ovs/lib/dpif-netdev.c

/*
 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <config.h>
#include "dpif.h"

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <net/if.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <unistd.h>

#include "classifier.h"
#include "csum.h"
#include "dpif.h"
#include "dpif-provider.h"
#include "dummy.h"
#include "dynamic-string.h"
#include "flow.h"
#include "hmap.h"
#include "latch.h"
#include "list.h"
#include "meta-flow.h"
#include "netdev.h"
#include "netdev-dpdk.h"
#include "netdev-vport.h"
#include "netlink.h"
#include "odp-execute.h"
#include "odp-util.h"
#include "ofp-print.h"
#include "ofpbuf.h"
#include "ovs-rcu.h"
#include "packets.h"
#include "poll-loop.h"
#include "random.h"
#include "seq.h"
#include "shash.h"
#include "sset.h"
#include "timeval.h"
#include "unixctl.h"
#include "util.h"
#include "vlog.h"

VLOG_DEFINE_THIS_MODULE(dpif_netdev);

/* By default, choose a priority in the middle. */
#define NETDEV_RULE_PRIORITY 0x8000

#define NR_THREADS 1

/* Configuration parameters. */
enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */

/* Queues. */
enum { MAX_QUEUE_LEN = 128 };   /* Maximum number of packets per queue. */
enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));

/* Protects against changes to 'dp_netdevs'. */
static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;

/* Contains all 'struct dp_netdev's. */
static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
    = SHASH_INITIALIZER(&dp_netdevs);

struct dp_netdev_upcall {
    struct dpif_upcall upcall;  /* Queued upcall information. */
    struct ofpbuf buf;          /* ofpbuf instance for upcall.packet. */
};

/* A queue passing packets from a struct dp_netdev to its clients (handlers).
 *
 *
 * Thread-safety
 * =============
 *
 * Any access at all requires the owning 'dp_netdev''s queue_rwlock and
 * its own mutex. */
struct dp_netdev_queue {
    struct ovs_mutex mutex;
    struct seq *seq;      /* Incremented whenever a packet is queued. */
    struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN] OVS_GUARDED;
    unsigned int head OVS_GUARDED;
    unsigned int tail OVS_GUARDED;
};

/* Datapath based on the network device interface from netdev.h.
 *
 *
 * Thread-safety
 * =============
 *
 * Some members, marked 'const', are immutable.  Accessing other members
 * requires synchronization, as noted in more detail below.
 *
 * Acquisition order is, from outermost to innermost:
 *
 *    dp_netdev_mutex (global)
 *    port_rwlock
 *    flow_mutex
 *    cls.rwlock
 *    queue_rwlock
 */
struct dp_netdev {
    const struct dpif_class *const class;
    const char *const name;
    struct ovs_refcount ref_cnt;
    atomic_flag destroyed;

    /* Flows.
     *
     * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock.
     *
     * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then
     * the 'cls->rwlock' write lock.  (The outer 'flow_mutex' allows writers to
     * atomically perform multiple operations on 'cls' and 'flow_table'.)
     */
    struct ovs_mutex flow_mutex;
    struct classifier cls;      /* Classifier.  Protected by cls.rwlock. */
    struct hmap flow_table OVS_GUARDED; /* Flow table. */

    /* Queues.
     *
     * 'queue_rwlock' protects the modification of 'handler_queues' and
     * 'n_handlers'.  The queue elements are protected by its
     * 'handler_queues''s mutex. */
    struct fat_rwlock queue_rwlock;
    struct dp_netdev_queue *handler_queues;
    uint32_t n_handlers;

    /* Statistics.
     *
     * ovsthread_stats is internally synchronized. */
    struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */

    /* Ports.
     *
     * Any lookup into 'ports' or any access to the dp_netdev_ports found
     * through 'ports' requires taking 'port_rwlock'. */
    struct ovs_rwlock port_rwlock;
    struct hmap ports OVS_GUARDED;
    struct seq *port_seq;       /* Incremented whenever a port changes. */

    /* Forwarding threads. */
    struct latch exit_latch;
    struct pmd_thread *pmd_threads;
    size_t n_pmd_threads;
    int pmd_count;
};

static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
                                                    odp_port_t)
    OVS_REQ_RDLOCK(dp->port_rwlock);

enum dp_stat_type {
    DP_STAT_HIT,                /* Packets that matched in the flow table. */
    DP_STAT_MISS,               /* Packets that did not match. */
    DP_STAT_LOST,               /* Packets not passed up to the client. */
    DP_N_STATS
};

/* Contained by struct dp_netdev's 'stats' member.  */
struct dp_netdev_stats {
    struct ovs_mutex mutex;          /* Protects 'n'. */

    /* Indexed by DP_STAT_*, protected by 'mutex'. */
    unsigned long long int n[DP_N_STATS] OVS_GUARDED;
};


/* A port in a netdev-based datapath. */
struct dp_netdev_port {
    struct hmap_node node;      /* Node in dp_netdev's 'ports'. */
    odp_port_t port_no;
    struct netdev *netdev;
    struct netdev_saved_flags *sf;
    struct netdev_rxq **rxq;
    struct ovs_refcount ref_cnt;
    char *type;                 /* Port type as requested by user. */
};

/* A flow in dp_netdev's 'flow_table'.
 *
 *
 * Thread-safety
 * =============
 *
 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
 * its dp_netdev's classifier.  The text below calls this classifier 'cls'.
 *
 * Motivation
 * ----------
 *
 * The thread safety rules described here for "struct dp_netdev_flow" are
 * motivated by two goals:
 *
 *    - Prevent threads that read members of "struct dp_netdev_flow" from
 *      reading bad data due to changes by some thread concurrently modifying
 *      those members.
 *
 *    - Prevent two threads making changes to members of a given "struct
 *      dp_netdev_flow" from interfering with each other.
 *
 *
 * Rules
 * -----
 *
 * A flow 'flow' may be accessed without a risk of being freed by code that
 * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to
 * 'flow->ref_cnt' (or both).  Code that needs to hold onto a flow for a while
 * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt'
 * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'.
 *
 * 'flow->ref_cnt' protects 'flow' from being freed.  It doesn't protect the
 * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't
 * protect members of 'flow' from modification (that's 'flow->mutex').
 *
 * 'flow->mutex' protects the members of 'flow' from modification.  It doesn't
 * protect the flow from being deleted from 'cls' (that's 'cls->rwlock') and it
 * doesn't prevent the flow from being freed (that's 'flow->ref_cnt').
 *
 * Some members, marked 'const', are immutable.  Accessing other members
 * requires synchronization, as noted in more detail below.
 */
struct dp_netdev_flow {
    /* Packet classification. */
    const struct cls_rule cr;   /* In owning dp_netdev's 'cls'. */

    /* Hash table index by unmasked flow. */
    const struct hmap_node node; /* In owning dp_netdev's 'flow_table'. */
    const struct flow flow;      /* The flow that created this entry. */

    /* Protects members marked OVS_GUARDED.
     *
     * Acquire after datapath's flow_mutex. */
    struct ovs_mutex mutex OVS_ACQ_AFTER(dp_netdev_mutex);

    /* Statistics.
     *
     * Reading or writing these members requires 'mutex'. */
    struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */

    /* Actions.
     *
     * Reading 'actions' requires 'mutex'.
     * Writing 'actions' requires 'mutex' and (to allow for transactions) the
     * datapath's flow_mutex. */
    OVSRCU_TYPE(struct dp_netdev_actions *) actions;
};

static void dp_netdev_flow_free(struct dp_netdev_flow *);

/* Contained by struct dp_netdev_flow's 'stats' member.  */
struct dp_netdev_flow_stats {
    struct ovs_mutex mutex;         /* Guards all the other members. */

    long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
    long long int packet_count OVS_GUARDED; /* Number of packets matched. */
    long long int byte_count OVS_GUARDED;   /* Number of bytes matched. */
    uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
};

/* A set of datapath actions within a "struct dp_netdev_flow".
 *
 *
 * Thread-safety
 * =============
 *
 * A struct dp_netdev_actions 'actions' may be accessed without a risk of being
 * freed by code that holds a read-lock or write-lock on 'flow->mutex' (where
 * 'flow' is the dp_netdev_flow for which 'flow->actions == actions') or that
 * owns a reference to 'actions->ref_cnt' (or both). */
struct dp_netdev_actions {
    /* These members are immutable: they do not change during the struct's
     * lifetime.  */
    struct nlattr *actions;     /* Sequence of OVS_ACTION_ATTR_* attributes. */
    unsigned int size;          /* Size of 'actions', in bytes. */
};

struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
                                                   size_t);
struct dp_netdev_actions *dp_netdev_flow_get_actions(
    const struct dp_netdev_flow *);
static void dp_netdev_actions_free(struct dp_netdev_actions *);

/* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
 * the performance overhead of interrupt processing.  Therefore netdev can
 * not implement rx-wait for these devices.  dpif-netdev needs to poll
 * these device to check for recv buffer.  pmd-thread does polling for
 * devices assigned to itself thread.
 *
 * DPDK used PMD for accessing NIC.
 *
 * A thread that receives packets from PMD ports, looks them up in the flow
 * table, and executes the actions it finds.
 **/
struct pmd_thread {
    struct dp_netdev *dp;
    pthread_t thread;
    int id;
    atomic_uint change_seq;
    char *name;
};

/* Interface to netdev-based datapath. */
struct dpif_netdev {
    struct dpif dpif;
    struct dp_netdev *dp;
    uint64_t last_port_seq;
};

static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
                              struct dp_netdev_port **portp)
    OVS_REQ_RDLOCK(dp->port_rwlock);
static int get_port_by_name(struct dp_netdev *dp, const char *devname,
                            struct dp_netdev_port **portp)
    OVS_REQ_RDLOCK(dp->port_rwlock);
static void dp_netdev_free(struct dp_netdev *)
    OVS_REQUIRES(dp_netdev_mutex);
static void dp_netdev_flow_flush(struct dp_netdev *);
static int do_add_port(struct dp_netdev *dp, const char *devname,
                       const char *type, odp_port_t port_no)
    OVS_REQ_WRLOCK(dp->port_rwlock);
static int do_del_port(struct dp_netdev *dp, odp_port_t port_no)
    OVS_REQ_WRLOCK(dp->port_rwlock);
static void dp_netdev_destroy_all_queues(struct dp_netdev *dp)
    OVS_REQ_WRLOCK(dp->queue_rwlock);
static int dpif_netdev_open(const struct dpif_class *, const char *name,
                            bool create, struct dpif **);
static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
                                      int queue_no, int type,
                                      const struct flow *,
                                      const struct nlattr *userdata);
static void dp_netdev_execute_actions(struct dp_netdev *dp,
                                      const struct flow *, struct ofpbuf *, bool may_steal,
                                      struct pkt_metadata *,
                                      const struct nlattr *actions,
                                      size_t actions_len);
static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
                                 struct pkt_metadata *);

static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n);

static struct dpif_netdev *
dpif_netdev_cast(const struct dpif *dpif)
{
    ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
    return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
}

static struct dp_netdev *
get_dp_netdev(const struct dpif *dpif)
{
    return dpif_netdev_cast(dpif)->dp;
}

static int
dpif_netdev_enumerate(struct sset *all_dps)
{
    struct shash_node *node;

    ovs_mutex_lock(&dp_netdev_mutex);
    SHASH_FOR_EACH(node, &dp_netdevs) {
        sset_add(all_dps, node->name);
    }
    ovs_mutex_unlock(&dp_netdev_mutex);

    return 0;
}

static bool
dpif_netdev_class_is_dummy(const struct dpif_class *class)
{
    return class != &dpif_netdev_class;
}

static const char *
dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
{
    return strcmp(type, "internal") ? type
                  : dpif_netdev_class_is_dummy(class) ? "dummy"
                  : "tap";
}

static struct dpif *
create_dpif_netdev(struct dp_netdev *dp)
{
    uint16_t netflow_id = hash_string(dp->name, 0);
    struct dpif_netdev *dpif;

    ovs_refcount_ref(&dp->ref_cnt);

    dpif = xmalloc(sizeof *dpif);
    dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
    dpif->dp = dp;
    dpif->last_port_seq = seq_read(dp->port_seq);

    return &dpif->dpif;
}

/* Choose an unused, non-zero port number and return it on success.
 * Return ODPP_NONE on failure. */
static odp_port_t
choose_port(struct dp_netdev *dp, const char *name)
    OVS_REQ_RDLOCK(dp->port_rwlock)
{
    uint32_t port_no;

    if (dp->class != &dpif_netdev_class) {
        const char *p;
        int start_no = 0;

        /* If the port name begins with "br", start the number search at
         * 100 to make writing tests easier. */
        if (!strncmp(name, "br", 2)) {
            start_no = 100;
        }

        /* If the port name contains a number, try to assign that port number.
         * This can make writing unit tests easier because port numbers are
         * predictable. */
        for (p = name; *p != '\0'; p++) {
            if (isdigit((unsigned char) *p)) {
                port_no = start_no + strtol(p, NULL, 10);
                if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
                    && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
                    return u32_to_odp(port_no);
                }
                break;
            }
        }
    }

    for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
        if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
            return u32_to_odp(port_no);
        }
    }

    return ODPP_NONE;
}

static int
create_dp_netdev(const char *name, const struct dpif_class *class,
                 struct dp_netdev **dpp)
    OVS_REQUIRES(dp_netdev_mutex)
{
    struct dp_netdev *dp;
    int error;

    dp = xzalloc(sizeof *dp);
    shash_add(&dp_netdevs, name, dp);

    *CONST_CAST(const struct dpif_class **, &dp->class) = class;
    *CONST_CAST(const char **, &dp->name) = xstrdup(name);
    ovs_refcount_init(&dp->ref_cnt);
    atomic_flag_clear(&dp->destroyed);

    ovs_mutex_init(&dp->flow_mutex);
    classifier_init(&dp->cls, NULL);
    hmap_init(&dp->flow_table);

    fat_rwlock_init(&dp->queue_rwlock);

    ovsthread_stats_init(&dp->stats);

    ovs_rwlock_init(&dp->port_rwlock);
    hmap_init(&dp->ports);
    dp->port_seq = seq_create();
    latch_init(&dp->exit_latch);

    ovs_rwlock_wrlock(&dp->port_rwlock);
    error = do_add_port(dp, name, "internal", ODPP_LOCAL);
    ovs_rwlock_unlock(&dp->port_rwlock);
    if (error) {
        dp_netdev_free(dp);
        return error;
    }

    *dpp = dp;
    return 0;
}

static int
dpif_netdev_open(const struct dpif_class *class, const char *name,
                 bool create, struct dpif **dpifp)
{
    struct dp_netdev *dp;
    int error;

    ovs_mutex_lock(&dp_netdev_mutex);
    dp = shash_find_data(&dp_netdevs, name);
    if (!dp) {
        error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
    } else {
        error = (dp->class != class ? EINVAL
                 : create ? EEXIST
                 : 0);
    }
    if (!error) {
        *dpifp = create_dpif_netdev(dp);
    }
    ovs_mutex_unlock(&dp_netdev_mutex);

    return error;
}

static void
dp_netdev_purge_queues(struct dp_netdev *dp)
    OVS_REQ_WRLOCK(dp->queue_rwlock)
{
    int i;

    for (i = 0; i < dp->n_handlers; i++) {
        struct dp_netdev_queue *q = &dp->handler_queues[i];

        ovs_mutex_lock(&q->mutex);
        while (q->tail != q->head) {
            struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
            ofpbuf_uninit(&u->upcall.packet);
            ofpbuf_uninit(&u->buf);
        }
        ovs_mutex_unlock(&q->mutex);
    }
}

/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
 * through the 'dp_netdevs' shash while freeing 'dp'. */
static void
dp_netdev_free(struct dp_netdev *dp)
    OVS_REQUIRES(dp_netdev_mutex)
{
    struct dp_netdev_port *port, *next;
    struct dp_netdev_stats *bucket;
    int i;

    shash_find_and_delete(&dp_netdevs, dp->name);

    dp_netdev_set_pmd_threads(dp, 0);
    free(dp->pmd_threads);

    dp_netdev_flow_flush(dp);
    ovs_rwlock_wrlock(&dp->port_rwlock);
    HMAP_FOR_EACH_SAFE (port, next, node, &dp->ports) {
        do_del_port(dp, port->port_no);
    }
    ovs_rwlock_unlock(&dp->port_rwlock);

    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
        ovs_mutex_destroy(&bucket->mutex);
        free_cacheline(bucket);
    }
    ovsthread_stats_destroy(&dp->stats);

    fat_rwlock_wrlock(&dp->queue_rwlock);
    dp_netdev_destroy_all_queues(dp);
    fat_rwlock_unlock(&dp->queue_rwlock);

    fat_rwlock_destroy(&dp->queue_rwlock);

    classifier_destroy(&dp->cls);
    hmap_destroy(&dp->flow_table);
    ovs_mutex_destroy(&dp->flow_mutex);
    seq_destroy(dp->port_seq);
    hmap_destroy(&dp->ports);
    latch_destroy(&dp->exit_latch);
    free(CONST_CAST(char *, dp->name));
    free(dp);
}

static void
dp_netdev_unref(struct dp_netdev *dp)
{
    if (dp) {
        /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
         * get a new reference to 'dp' through the 'dp_netdevs' shash. */
        ovs_mutex_lock(&dp_netdev_mutex);
        if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
            dp_netdev_free(dp);
        }
        ovs_mutex_unlock(&dp_netdev_mutex);
    }
}

static void
dpif_netdev_close(struct dpif *dpif)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);

    dp_netdev_unref(dp);
    free(dpif);
}

static int
dpif_netdev_destroy(struct dpif *dpif)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);

    if (!atomic_flag_test_and_set(&dp->destroyed)) {
        if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
            /* Can't happen: 'dpif' still owns a reference to 'dp'. */
            OVS_NOT_REACHED();
        }
    }

    return 0;
}

static int
dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_stats *bucket;
    size_t i;

    fat_rwlock_rdlock(&dp->cls.rwlock);
    stats->n_flows = hmap_count(&dp->flow_table);
    fat_rwlock_unlock(&dp->cls.rwlock);

    stats->n_hit = stats->n_missed = stats->n_lost = 0;
    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
        ovs_mutex_lock(&bucket->mutex);
        stats->n_hit += bucket->n[DP_STAT_HIT];
        stats->n_missed += bucket->n[DP_STAT_MISS];
        stats->n_lost += bucket->n[DP_STAT_LOST];
        ovs_mutex_unlock(&bucket->mutex);
    }
    stats->n_masks = UINT32_MAX;
    stats->n_mask_hit = UINT64_MAX;

    return 0;
}

static void
dp_netdev_reload_pmd_threads(struct dp_netdev *dp)
{
    int i;

    for (i = 0; i < dp->n_pmd_threads; i++) {
        struct pmd_thread *f = &dp->pmd_threads[i];
        int id;

        atomic_add(&f->change_seq, 1, &id);
   }
}

static int
do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
            odp_port_t port_no)
    OVS_REQ_WRLOCK(dp->port_rwlock)
{
    struct netdev_saved_flags *sf;
    struct dp_netdev_port *port;
    struct netdev *netdev;
    enum netdev_flags flags;
    const char *open_type;
    int error;
    int i;

    /* XXX reject devices already in some dp_netdev. */

    /* Open and validate network device. */
    open_type = dpif_netdev_port_open_type(dp->class, type);
    error = netdev_open(devname, open_type, &netdev);
    if (error) {
        return error;
    }
    /* XXX reject non-Ethernet devices */

    netdev_get_flags(netdev, &flags);
    if (flags & NETDEV_LOOPBACK) {
        VLOG_ERR("%s: cannot add a loopback device", devname);
        netdev_close(netdev);
        return EINVAL;
    }

    port = xzalloc(sizeof *port);
    port->port_no = port_no;
    port->netdev = netdev;
    port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
    port->type = xstrdup(type);
    for (i = 0; i < netdev_n_rxq(netdev); i++) {
        error = netdev_rxq_open(netdev, &port->rxq[i], i);
        if (error
            && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
            VLOG_ERR("%s: cannot receive packets on this network device (%s)",
                     devname, ovs_strerror(errno));
            netdev_close(netdev);
            return error;
        }
    }

    error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
    if (error) {
        for (i = 0; i < netdev_n_rxq(netdev); i++) {
            netdev_rxq_close(port->rxq[i]);
        }
        netdev_close(netdev);
        free(port->rxq);
        free(port);
        return error;
    }
    port->sf = sf;

    if (netdev_is_pmd(netdev)) {
        dp->pmd_count++;
        dp_netdev_set_pmd_threads(dp, NR_THREADS);
        dp_netdev_reload_pmd_threads(dp);
    }
    ovs_refcount_init(&port->ref_cnt);

    hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
    seq_change(dp->port_seq);

    return 0;
}

static int
dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
                     odp_port_t *port_nop)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
    const char *dpif_port;
    odp_port_t port_no;
    int error;

    ovs_rwlock_wrlock(&dp->port_rwlock);
    dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
    if (*port_nop != ODPP_NONE) {
        port_no = *port_nop;
        error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
    } else {
        port_no = choose_port(dp, dpif_port);
        error = port_no == ODPP_NONE ? EFBIG : 0;
    }
    if (!error) {
        *port_nop = port_no;
        error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
    }
    ovs_rwlock_unlock(&dp->port_rwlock);

    return error;
}

static int
dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    int error;

    ovs_rwlock_wrlock(&dp->port_rwlock);
    error = port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no);
    ovs_rwlock_unlock(&dp->port_rwlock);

    return error;
}

static bool
is_valid_port_number(odp_port_t port_no)
{
    return port_no != ODPP_NONE;
}

static struct dp_netdev_port *
dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
    OVS_REQ_RDLOCK(dp->port_rwlock)
{
    struct dp_netdev_port *port;

    HMAP_FOR_EACH_IN_BUCKET (port, node, hash_int(odp_to_u32(port_no), 0),
                             &dp->ports) {
        if (port->port_no == port_no) {
            return port;
        }
    }
    return NULL;
}

static int
get_port_by_number(struct dp_netdev *dp,
                   odp_port_t port_no, struct dp_netdev_port **portp)
    OVS_REQ_RDLOCK(dp->port_rwlock)
{
    if (!is_valid_port_number(port_no)) {
        *portp = NULL;
        return EINVAL;
    } else {
        *portp = dp_netdev_lookup_port(dp, port_no);
        return *portp ? 0 : ENOENT;
    }
}

static void
port_ref(struct dp_netdev_port *port)
{
    if (port) {
        ovs_refcount_ref(&port->ref_cnt);
    }
}

static void
port_unref(struct dp_netdev_port *port)
{
    if (port && ovs_refcount_unref(&port->ref_cnt) == 1) {
        int i;

        netdev_close(port->netdev);
        netdev_restore_flags(port->sf);

        for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
            netdev_rxq_close(port->rxq[i]);
        }
        free(port->type);
        free(port);
    }
}

static int
get_port_by_name(struct dp_netdev *dp,
                 const char *devname, struct dp_netdev_port **portp)
    OVS_REQ_RDLOCK(dp->port_rwlock)
{
    struct dp_netdev_port *port;

    HMAP_FOR_EACH (port, node, &dp->ports) {
        if (!strcmp(netdev_get_name(port->netdev), devname)) {
            *portp = port;
            return 0;
        }
    }
    return ENOENT;
}

static int
do_del_port(struct dp_netdev *dp, odp_port_t port_no)
    OVS_REQ_WRLOCK(dp->port_rwlock)
{
    struct dp_netdev_port *port;
    int error;

    error = get_port_by_number(dp, port_no, &port);
    if (error) {
        return error;
    }

    hmap_remove(&dp->ports, &port->node);
    seq_change(dp->port_seq);
    if (netdev_is_pmd(port->netdev)) {
        dp_netdev_reload_pmd_threads(dp);
    }

    port_unref(port);
    return 0;
}

static void
answer_port_query(const struct dp_netdev_port *port,
                  struct dpif_port *dpif_port)
{
    dpif_port->name = xstrdup(netdev_get_name(port->netdev));
    dpif_port->type = xstrdup(port->type);
    dpif_port->port_no = port->port_no;
}

static int
dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
                                 struct dpif_port *dpif_port)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_port *port;
    int error;

    ovs_rwlock_rdlock(&dp->port_rwlock);
    error = get_port_by_number(dp, port_no, &port);
    if (!error && dpif_port) {
        answer_port_query(port, dpif_port);
    }
    ovs_rwlock_unlock(&dp->port_rwlock);

    return error;
}

static int
dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
                               struct dpif_port *dpif_port)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_port *port;
    int error;

    ovs_rwlock_rdlock(&dp->port_rwlock);
    error = get_port_by_name(dp, devname, &port);
    if (!error && dpif_port) {
        answer_port_query(port, dpif_port);
    }
    ovs_rwlock_unlock(&dp->port_rwlock);

    return error;
}

static void
dp_netdev_flow_free(struct dp_netdev_flow *flow)
{
    struct dp_netdev_flow_stats *bucket;
    size_t i;

    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) {
        ovs_mutex_destroy(&bucket->mutex);
        free_cacheline(bucket);
    }
    ovsthread_stats_destroy(&flow->stats);

    cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
    dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
    ovs_mutex_destroy(&flow->mutex);
    free(flow);
}

static void
dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
    OVS_REQ_WRLOCK(dp->cls.rwlock)
    OVS_REQUIRES(dp->flow_mutex)
{
    struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
    struct hmap_node *node = CONST_CAST(struct hmap_node *, &flow->node);

    classifier_remove(&dp->cls, cr);
    hmap_remove(&dp->flow_table, node);
    ovsrcu_postpone(dp_netdev_flow_free, flow);
}

static void
dp_netdev_flow_flush(struct dp_netdev *dp)
{
    struct dp_netdev_flow *netdev_flow, *next;

    ovs_mutex_lock(&dp->flow_mutex);
    fat_rwlock_wrlock(&dp->cls.rwlock);
    HMAP_FOR_EACH_SAFE (netdev_flow, next, node, &dp->flow_table) {
        dp_netdev_remove_flow(dp, netdev_flow);
    }
    fat_rwlock_unlock(&dp->cls.rwlock);
    ovs_mutex_unlock(&dp->flow_mutex);
}

static int
dpif_netdev_flow_flush(struct dpif *dpif)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);

    dp_netdev_flow_flush(dp);
    return 0;
}

struct dp_netdev_port_state {
    uint32_t bucket;
    uint32_t offset;
    char *name;
};

static int
dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
{
    *statep = xzalloc(sizeof(struct dp_netdev_port_state));
    return 0;
}

static int
dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
                           struct dpif_port *dpif_port)
{
    struct dp_netdev_port_state *state = state_;
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct hmap_node *node;
    int retval;

    ovs_rwlock_rdlock(&dp->port_rwlock);
    node = hmap_at_position(&dp->ports, &state->bucket, &state->offset);
    if (node) {
        struct dp_netdev_port *port;

        port = CONTAINER_OF(node, struct dp_netdev_port, node);

        free(state->name);
        state->name = xstrdup(netdev_get_name(port->netdev));
        dpif_port->name = state->name;
        dpif_port->type = port->type;
        dpif_port->port_no = port->port_no;

        retval = 0;
    } else {
        retval = EOF;
    }
    ovs_rwlock_unlock(&dp->port_rwlock);

    return retval;
}

static int
dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
{
    struct dp_netdev_port_state *state = state_;
    free(state->name);
    free(state);
    return 0;
}

static int
dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
{
    struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
    uint64_t new_port_seq;
    int error;

    new_port_seq = seq_read(dpif->dp->port_seq);
    if (dpif->last_port_seq != new_port_seq) {
        dpif->last_port_seq = new_port_seq;
        error = ENOBUFS;
    } else {
        error = EAGAIN;
    }

    return error;
}

static void
dpif_netdev_port_poll_wait(const struct dpif *dpif_)
{
    struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);

    seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
}

static struct dp_netdev_flow *
dp_netdev_flow_cast(const struct cls_rule *cr)
{
    return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
}

static struct dp_netdev_flow *
dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *flow)
    OVS_EXCLUDED(dp->cls.rwlock)
{
    struct dp_netdev_flow *netdev_flow;

    fat_rwlock_rdlock(&dp->cls.rwlock);
    netdev_flow = dp_netdev_flow_cast(classifier_lookup(&dp->cls, flow, NULL));
    fat_rwlock_unlock(&dp->cls.rwlock);

    return netdev_flow;
}

static struct dp_netdev_flow *
dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
    OVS_REQ_RDLOCK(dp->cls.rwlock)
{
    struct dp_netdev_flow *netdev_flow;

    HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
                             &dp->flow_table) {
        if (flow_equal(&netdev_flow->flow, flow)) {
            return netdev_flow;
        }
    }

    return NULL;
}

static void
get_dpif_flow_stats(struct dp_netdev_flow *netdev_flow,
                    struct dpif_flow_stats *stats)
{
    struct dp_netdev_flow_stats *bucket;
    size_t i;

    memset(stats, 0, sizeof *stats);
    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
        ovs_mutex_lock(&bucket->mutex);
        stats->n_packets += bucket->packet_count;
        stats->n_bytes += bucket->byte_count;
        stats->used = MAX(stats->used, bucket->used);
        stats->tcp_flags |= bucket->tcp_flags;
        ovs_mutex_unlock(&bucket->mutex);
    }
}

static int
dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
                              const struct nlattr *mask_key,
                              uint32_t mask_key_len, const struct flow *flow,
                              struct flow *mask)
{
    if (mask_key_len) {
        enum odp_key_fitness fitness;

        fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
        if (fitness) {
            /* This should not happen: it indicates that
             * odp_flow_key_from_mask() and odp_flow_key_to_mask()
             * disagree on the acceptable form of a mask.  Log the problem
             * as an error, with enough details to enable debugging. */
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);

            if (!VLOG_DROP_ERR(&rl)) {
                struct ds s;

                ds_init(&s);
                odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
                                true);
                VLOG_ERR("internal error parsing flow mask %s (%s)",
                         ds_cstr(&s), odp_key_fitness_to_string(fitness));
                ds_destroy(&s);
            }

            return EINVAL;
        }
        /* Force unwildcard the in_port. */
        mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
    } else {
        enum mf_field_id id;
        /* No mask key, unwildcard everything except fields whose
         * prerequisities are not met. */
        memset(mask, 0x0, sizeof *mask);

        for (id = 0; id < MFF_N_IDS; ++id) {
            /* Skip registers and metadata. */
            if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
                && id != MFF_METADATA) {
                const struct mf_field *mf = mf_from_id(id);
                if (mf_are_prereqs_ok(mf, flow)) {
                    mf_mask_field(mf, mask);
                }
            }
        }
    }

    return 0;
}

static int
dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
                              struct flow *flow)
{
    odp_port_t in_port;

    if (odp_flow_key_to_flow(key, key_len, flow)) {
        /* This should not happen: it indicates that odp_flow_key_from_flow()
         * and odp_flow_key_to_flow() disagree on the acceptable form of a
         * flow.  Log the problem as an error, with enough details to enable
         * debugging. */
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);

        if (!VLOG_DROP_ERR(&rl)) {
            struct ds s;

            ds_init(&s);
            odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
            VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
            ds_destroy(&s);
        }

        return EINVAL;
    }

    in_port = flow->in_port.odp_port;
    if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
        return EINVAL;
    }

    return 0;
}

static int
dpif_netdev_flow_get(const struct dpif *dpif,
                     const struct nlattr *nl_key, size_t nl_key_len,
                     struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_flow *netdev_flow;
    struct flow key;
    int error;

    error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
    if (error) {
        return error;
    }

    fat_rwlock_rdlock(&dp->cls.rwlock);
    netdev_flow = dp_netdev_find_flow(dp, &key);
    fat_rwlock_unlock(&dp->cls.rwlock);

    if (netdev_flow) {
        if (stats) {
            get_dpif_flow_stats(netdev_flow, stats);
        }

        if (actionsp) {
            struct dp_netdev_actions *actions;

            actions = dp_netdev_flow_get_actions(netdev_flow);
            *actionsp = ofpbuf_clone_data(actions->actions, actions->size);
        }
     } else {
        error = ENOENT;
    }

    return error;
}

static int
dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow,
                   const struct flow_wildcards *wc,
                   const struct nlattr *actions,
                   size_t actions_len)
    OVS_REQUIRES(dp->flow_mutex)
{
    struct dp_netdev_flow *netdev_flow;
    struct match match;

    netdev_flow = xzalloc(sizeof *netdev_flow);
    *CONST_CAST(struct flow *, &netdev_flow->flow) = *flow;

    ovs_mutex_init(&netdev_flow->mutex);

    ovsthread_stats_init(&netdev_flow->stats);

    ovsrcu_set(&netdev_flow->actions,
               dp_netdev_actions_create(actions, actions_len));

    match_init(&match, flow, wc);
    cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
                  &match, NETDEV_RULE_PRIORITY);
    fat_rwlock_wrlock(&dp->cls.rwlock);
    classifier_insert(&dp->cls,
                      CONST_CAST(struct cls_rule *, &netdev_flow->cr));
    hmap_insert(&dp->flow_table,
                CONST_CAST(struct hmap_node *, &netdev_flow->node),
                flow_hash(flow, 0));
    fat_rwlock_unlock(&dp->cls.rwlock);

    return 0;
}

static void
clear_stats(struct dp_netdev_flow *netdev_flow)
{
    struct dp_netdev_flow_stats *bucket;
    size_t i;

    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
        ovs_mutex_lock(&bucket->mutex);
        bucket->used = 0;
        bucket->packet_count = 0;
        bucket->byte_count = 0;
        bucket->tcp_flags = 0;
        ovs_mutex_unlock(&bucket->mutex);
    }
}

static int
dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_flow *netdev_flow;
    struct flow flow;
    struct flow_wildcards wc;
    int error;

    error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow);
    if (error) {
        return error;
    }
    error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
                                          put->mask, put->mask_len,
                                          &flow, &wc.masks);
    if (error) {
        return error;
    }

    ovs_mutex_lock(&dp->flow_mutex);
    netdev_flow = dp_netdev_lookup_flow(dp, &flow);
    if (!netdev_flow) {
        if (put->flags & DPIF_FP_CREATE) {
            if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
                if (put->stats) {
                    memset(put->stats, 0, sizeof *put->stats);
                }
                error = dp_netdev_flow_add(dp, &flow, &wc, put->actions,
                                           put->actions_len);
            } else {
                error = EFBIG;
            }
        } else {
            error = ENOENT;
        }
    } else {
        if (put->flags & DPIF_FP_MODIFY
            && flow_equal(&flow, &netdev_flow->flow)) {
            struct dp_netdev_actions *new_actions;
            struct dp_netdev_actions *old_actions;

            new_actions = dp_netdev_actions_create(put->actions,
                                                   put->actions_len);

            old_actions = dp_netdev_flow_get_actions(netdev_flow);
            ovsrcu_set(&netdev_flow->actions, new_actions);

            if (put->stats) {
                get_dpif_flow_stats(netdev_flow, put->stats);
            }
            if (put->flags & DPIF_FP_ZERO_STATS) {
                clear_stats(netdev_flow);
            }

            ovsrcu_postpone(dp_netdev_actions_free, old_actions);
        } else if (put->flags & DPIF_FP_CREATE) {
            error = EEXIST;
        } else {
            /* Overlapping flow. */
            error = EINVAL;
        }
    }
    ovs_mutex_unlock(&dp->flow_mutex);

    return error;
}

static int
dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_flow *netdev_flow;
    struct flow key;
    int error;

    error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
    if (error) {
        return error;
    }

    ovs_mutex_lock(&dp->flow_mutex);
    fat_rwlock_wrlock(&dp->cls.rwlock);
    netdev_flow = dp_netdev_find_flow(dp, &key);
    if (netdev_flow) {
        if (del->stats) {
            get_dpif_flow_stats(netdev_flow, del->stats);
        }
        dp_netdev_remove_flow(dp, netdev_flow);
    } else {
        error = ENOENT;
    }
    fat_rwlock_unlock(&dp->cls.rwlock);
    ovs_mutex_unlock(&dp->flow_mutex);

    return error;
}

struct dp_netdev_flow_state {
    struct dp_netdev_actions *actions;
    struct odputil_keybuf keybuf;
    struct odputil_keybuf maskbuf;
    struct dpif_flow_stats stats;
};

struct dp_netdev_flow_iter {
    uint32_t bucket;
    uint32_t offset;
    int status;
    struct ovs_mutex mutex;
};

static void
dpif_netdev_flow_dump_state_init(void **statep)
{
    struct dp_netdev_flow_state *state;

    *statep = state = xmalloc(sizeof *state);
    state->actions = NULL;
}

static void
dpif_netdev_flow_dump_state_uninit(void *state_)
{
    struct dp_netdev_flow_state *state = state_;

    free(state);
}

static int
dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **iterp)
{
    struct dp_netdev_flow_iter *iter;

    *iterp = iter = xmalloc(sizeof *iter);
    iter->bucket = 0;
    iter->offset = 0;
    iter->status = 0;
    ovs_mutex_init(&iter->mutex);
    return 0;
}

/* XXX the caller must use 'actions' without quiescing */
static int
dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_,
                           const struct nlattr **key, size_t *key_len,
                           const struct nlattr **mask, size_t *mask_len,
                           const struct nlattr **actions, size_t *actions_len,
                           const struct dpif_flow_stats **stats)
{
    struct dp_netdev_flow_iter *iter = iter_;
    struct dp_netdev_flow_state *state = state_;
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_flow *netdev_flow;
    int error;

    ovs_mutex_lock(&iter->mutex);
    error = iter->status;
    if (!error) {
        struct hmap_node *node;

        fat_rwlock_rdlock(&dp->cls.rwlock);
        node = hmap_at_position(&dp->flow_table, &iter->bucket, &iter->offset);
        if (node) {
            netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
        }
        fat_rwlock_unlock(&dp->cls.rwlock);
        if (!node) {
            iter->status = error = EOF;
        }
    }
    ovs_mutex_unlock(&iter->mutex);
    if (error) {
        return error;
    }

    if (key) {
        struct ofpbuf buf;

        ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
        odp_flow_key_from_flow(&buf, &netdev_flow->flow,
                               netdev_flow->flow.in_port.odp_port);

        *key = buf.data;
        *key_len = buf.size;
    }

    if (key && mask) {
        struct ofpbuf buf;
        struct flow_wildcards wc;

        ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf);
        minimask_expand(&netdev_flow->cr.match.mask, &wc);
        odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
                               odp_to_u32(wc.masks.in_port.odp_port),
                               SIZE_MAX);

        *mask = buf.data;
        *mask_len = buf.size;
    }

    if (actions || stats) {
        state->actions = NULL;

        if (actions) {
            state->actions = dp_netdev_flow_get_actions(netdev_flow);
            *actions = state->actions->actions;
            *actions_len = state->actions->size;
        }

        if (stats) {
            get_dpif_flow_stats(netdev_flow, &state->stats);
            *stats = &state->stats;
        }
    }

    return 0;
}

static int
dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *iter_)
{
    struct dp_netdev_flow_iter *iter = iter_;

    ovs_mutex_destroy(&iter->mutex);
    free(iter);
    return 0;
}

static int
dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct pkt_metadata *md = &execute->md;
    struct flow key;

    if (execute->packet->size < ETH_HEADER_LEN ||
        execute->packet->size > UINT16_MAX) {
        return EINVAL;
    }

    /* Extract flow key. */
    flow_extract(execute->packet, md, &key);

    ovs_rwlock_rdlock(&dp->port_rwlock);
    dp_netdev_execute_actions(dp, &key, execute->packet, false, md,
                              execute->actions, execute->actions_len);
    ovs_rwlock_unlock(&dp->port_rwlock);

    return 0;
}

static void
dp_netdev_destroy_all_queues(struct dp_netdev *dp)
    OVS_REQ_WRLOCK(dp->queue_rwlock)
{
    size_t i;

    dp_netdev_purge_queues(dp);

    for (i = 0; i < dp->n_handlers; i++) {
        struct dp_netdev_queue *q = &dp->handler_queues[i];

        ovs_mutex_destroy(&q->mutex);
        seq_destroy(q->seq);
    }
    free(dp->handler_queues);
    dp->handler_queues = NULL;
    dp->n_handlers = 0;
}

static void
dp_netdev_refresh_queues(struct dp_netdev *dp, uint32_t n_handlers)
    OVS_REQ_WRLOCK(dp->queue_rwlock)
{
    if (dp->n_handlers != n_handlers) {
        size_t i;

        dp_netdev_destroy_all_queues(dp);

        dp->n_handlers = n_handlers;
        dp->handler_queues = xzalloc(n_handlers * sizeof *dp->handler_queues);

        for (i = 0; i < n_handlers; i++) {
            struct dp_netdev_queue *q = &dp->handler_queues[i];

            ovs_mutex_init(&q->mutex);
            q->seq = seq_create();
        }
    }
}

static int
dpif_netdev_recv_set(struct dpif *dpif, bool enable)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);

    if ((dp->handler_queues != NULL) == enable) {
        return 0;
    }

    fat_rwlock_wrlock(&dp->queue_rwlock);
    if (!enable) {
        dp_netdev_destroy_all_queues(dp);
    } else {
        dp_netdev_refresh_queues(dp, 1);
    }
    fat_rwlock_unlock(&dp->queue_rwlock);

    return 0;
}

static int
dpif_netdev_handlers_set(struct dpif *dpif, uint32_t n_handlers)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);

    fat_rwlock_wrlock(&dp->queue_rwlock);
    if (dp->handler_queues) {
        dp_netdev_refresh_queues(dp, n_handlers);
    }
    fat_rwlock_unlock(&dp->queue_rwlock);

    return 0;
}

static int
dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
                              uint32_t queue_id, uint32_t *priority)
{
    *priority = queue_id;
    return 0;
}

static bool
dp_netdev_recv_check(const struct dp_netdev *dp, const uint32_t handler_id)
    OVS_REQ_RDLOCK(dp->queue_rwlock)
{
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);

    if (!dp->handler_queues) {
        VLOG_WARN_RL(&rl, "receiving upcall disabled");
        return false;
    }

    if (handler_id >= dp->n_handlers) {
        VLOG_WARN_RL(&rl, "handler index out of bound");
        return false;
    }

    return true;
}

static int
dpif_netdev_recv(struct dpif *dpif, uint32_t handler_id,
                 struct dpif_upcall *upcall, struct ofpbuf *buf)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_queue *q;
    int error = 0;

    fat_rwlock_rdlock(&dp->queue_rwlock);

    if (!dp_netdev_recv_check(dp, handler_id)) {
        error = EAGAIN;
        goto out;
    }

    q = &dp->handler_queues[handler_id];
    ovs_mutex_lock(&q->mutex);
    if (q->head != q->tail) {
        struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];

        *upcall = u->upcall;

        ofpbuf_uninit(buf);
        *buf = u->buf;
    } else {
        error = EAGAIN;
    }
    ovs_mutex_unlock(&q->mutex);

out:
    fat_rwlock_unlock(&dp->queue_rwlock);

    return error;
}

static void
dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id)
{
    struct dp_netdev *dp = get_dp_netdev(dpif);
    struct dp_netdev_queue *q;
    uint64_t seq;

    fat_rwlock_rdlock(&dp->queue_rwlock);

    if (!dp_netdev_recv_check(dp, handler_id)) {
        goto out;
    }

    q = &dp->handler_queues[handler_id];
    ovs_mutex_lock(&q->mutex);
    seq = seq_read(q->seq);
    if (q->head != q->tail) {
        poll_immediate_wake();
    } else {
        seq_wait(q->seq, seq);
    }

    ovs_mutex_unlock(&q->mutex);

out:
    fat_rwlock_unlock(&dp->queue_rwlock);
}

static void
dpif_netdev_recv_purge(struct dpif *dpif)
{
    struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);

    fat_rwlock_wrlock(&dpif_netdev->dp->queue_rwlock);
    dp_netdev_purge_queues(dpif_netdev->dp);
    fat_rwlock_unlock(&dpif_netdev->dp->queue_rwlock);
}

/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
 * 'ofpacts'. */
struct dp_netdev_actions *
dp_netdev_actions_create(const struct nlattr *actions, size_t size)
{
    struct dp_netdev_actions *netdev_actions;

    netdev_actions = xmalloc(sizeof *netdev_actions);
    netdev_actions->actions = xmemdup(actions, size);
    netdev_actions->size = size;

    return netdev_actions;
}

struct dp_netdev_actions *
dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow)
{
    return ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
}

static void
dp_netdev_actions_free(struct dp_netdev_actions *actions)
{
    free(actions->actions);
    free(actions);
}


inline static void
dp_netdev_process_rxq_port(struct dp_netdev *dp,
                          struct dp_netdev_port *port,
                          struct netdev_rxq *rxq)
{
    struct ofpbuf *packet[NETDEV_MAX_RX_BATCH];
    int error, c;

    error = netdev_rxq_recv(rxq, packet, &c);
    if (!error) {
        struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no);
        int i;

        for (i = 0; i < c; i++) {
            dp_netdev_port_input(dp, packet[i], &md);
        }
    } else if (error != EAGAIN && error != EOPNOTSUPP) {
        static struct vlog_rate_limit rl
            = VLOG_RATE_LIMIT_INIT(1, 5);

        VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
                    netdev_get_name(port->netdev),
                    ovs_strerror(error));
    }
}

static void
dpif_netdev_run(struct dpif *dpif)
{
    struct dp_netdev_port *port;
    struct dp_netdev *dp = get_dp_netdev(dpif);

    ovs_rwlock_rdlock(&dp->port_rwlock);

    HMAP_FOR_EACH (port, node, &dp->ports) {
        if (!netdev_is_pmd(port->netdev)) {
            int i;

            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
                dp_netdev_process_rxq_port(dp, port, port->rxq[i]);
            }
        }
    }

    ovs_rwlock_unlock(&dp->port_rwlock);
}

static void
dpif_netdev_wait(struct dpif *dpif)
{
    struct dp_netdev_port *port;
    struct dp_netdev *dp = get_dp_netdev(dpif);

    ovs_rwlock_rdlock(&dp->port_rwlock);

    HMAP_FOR_EACH (port, node, &dp->ports) {
        if (!netdev_is_pmd(port->netdev)) {
            int i;

            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
                netdev_rxq_wait(port->rxq[i]);
            }
        }
    }
    ovs_rwlock_unlock(&dp->port_rwlock);
}

struct rxq_poll {
    struct dp_netdev_port *port;
    struct netdev_rxq *rx;
};

static int
pmd_load_queues(struct pmd_thread *f,
                struct rxq_poll **ppoll_list, int poll_cnt)
{
    struct dp_netdev *dp = f->dp;
    struct rxq_poll *poll_list = *ppoll_list;
    struct dp_netdev_port *port;
    int id = f->id;
    int index;
    int i;

    /* Simple scheduler for netdev rx polling. */
    ovs_rwlock_rdlock(&dp->port_rwlock);
    for (i = 0; i < poll_cnt; i++) {
         port_unref(poll_list[i].port);
    }

    poll_cnt = 0;
    index = 0;

    HMAP_FOR_EACH (port, node, &f->dp->ports) {
        if (netdev_is_pmd(port->netdev)) {
            int i;

            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
                if ((index % dp->n_pmd_threads) == id) {
                    poll_list = xrealloc(poll_list, sizeof *poll_list * (poll_cnt + 1));

                    port_ref(port);
                    poll_list[poll_cnt].port = port;
                    poll_list[poll_cnt].rx = port->rxq[i];
                    poll_cnt++;
                }
                index++;
            }
        }
    }

    ovs_rwlock_unlock(&dp->port_rwlock);
    *ppoll_list = poll_list;
    return poll_cnt;
}

static void *
pmd_thread_main(void *f_)
{
    struct pmd_thread *f = f_;
    struct dp_netdev *dp = f->dp;
    unsigned int lc = 0;
    struct rxq_poll *poll_list;
    unsigned int port_seq;
    int poll_cnt;
    int i;

    f->name = xasprintf("pmd_%u", ovsthread_id_self());
    set_subprogram_name("%s", f->name);
    poll_cnt = 0;
    poll_list = NULL;

    pmd_thread_setaffinity_cpu(f->id);
reload:
    poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt);
    atomic_read(&f->change_seq, &port_seq);

    for (;;) {
        unsigned int c_port_seq;
        int i;

        for (i = 0; i < poll_cnt; i++) {
            dp_netdev_process_rxq_port(dp,  poll_list[i].port, poll_list[i].rx);
        }

        if (lc++ > 1024) {
            ovsrcu_quiesce();

            /* TODO: need completely userspace based signaling method.
             * to keep this thread entirely in userspace.
             * For now using atomic counter. */
            lc = 0;
            atomic_read_explicit(&f->change_seq, &c_port_seq, memory_order_consume);
            if (c_port_seq != port_seq) {
                break;
            }
        }
    }

    if (!latch_is_set(&f->dp->exit_latch)){
        goto reload;
    }

    for (i = 0; i < poll_cnt; i++) {
         port_unref(poll_list[i].port);
    }

    free(poll_list);
    free(f->name);
    return NULL;
}

static void
dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n)
{
    int i;

    if (n == dp->n_pmd_threads) {
        return;
    }

    /* Stop existing threads. */
    latch_set(&dp->exit_latch);
    dp_netdev_reload_pmd_threads(dp);
    for (i = 0; i < dp->n_pmd_threads; i++) {
        struct pmd_thread *f = &dp->pmd_threads[i];

        xpthread_join(f->thread, NULL);
    }
    latch_poll(&dp->exit_latch);
    free(dp->pmd_threads);

    /* Start new threads. */
    dp->pmd_threads = xmalloc(n * sizeof *dp->pmd_threads);
    dp->n_pmd_threads = n;

    for (i = 0; i < n; i++) {
        struct pmd_thread *f = &dp->pmd_threads[i];

        f->dp = dp;
        f->id = i;
        atomic_store(&f->change_seq, 1);

        /* Each thread will distribute all devices rx-queues among
         * themselves. */
        xpthread_create(&f->thread, NULL, pmd_thread_main, f);
    }
}


static void *
dp_netdev_flow_stats_new_cb(void)
{
    struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
    ovs_mutex_init(&bucket->mutex);
    return bucket;
}

static void
dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
                    const struct ofpbuf *packet,
                    const struct flow *key)
{
    uint16_t tcp_flags = ntohs(key->tcp_flags);
    long long int now = time_msec();
    struct dp_netdev_flow_stats *bucket;

    bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
                                        dp_netdev_flow_stats_new_cb);

    ovs_mutex_lock(&bucket->mutex);
    bucket->used = MAX(now, bucket->used);
    bucket->packet_count++;
    bucket->byte_count += packet->size;
    bucket->tcp_flags |= tcp_flags;
    ovs_mutex_unlock(&bucket->mutex);
}

static void *
dp_netdev_stats_new_cb(void)
{
    struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
    ovs_mutex_init(&bucket->mutex);
    return bucket;
}

static void
dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
{
    struct dp_netdev_stats *bucket;

    bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
    ovs_mutex_lock(&bucket->mutex);
    bucket->n[type]++;
    ovs_mutex_unlock(&bucket->mutex);
}

static void
dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
                     struct pkt_metadata *md)
{
    struct dp_netdev_flow *netdev_flow;
    struct flow key;

    if (packet->size < ETH_HEADER_LEN) {
        ofpbuf_delete(packet);
        return;
    }
    flow_extract(packet, md, &key);
    netdev_flow = dp_netdev_lookup_flow(dp, &key);
    if (netdev_flow) {
        struct dp_netdev_actions *actions;

        dp_netdev_flow_used(netdev_flow, packet, &key);

        actions = dp_netdev_flow_get_actions(netdev_flow);
        dp_netdev_execute_actions(dp, &key, packet, true, md,
                                  actions->actions, actions->size);
        dp_netdev_count_packet(dp, DP_STAT_HIT);
    } else if (dp->handler_queues) {
        dp_netdev_count_packet(dp, DP_STAT_MISS);
        dp_netdev_output_userspace(dp, packet,
                                   flow_hash_5tuple(&key, 0) % dp->n_handlers,
                                   DPIF_UC_MISS, &key, NULL);
        ofpbuf_delete(packet);
    }
}

static int
dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
                           int queue_no, int type, const struct flow *flow,
                           const struct nlattr *userdata)
{
    struct dp_netdev_queue *q;
    int error;

    fat_rwlock_rdlock(&dp->queue_rwlock);
    q = &dp->handler_queues[queue_no];
    ovs_mutex_lock(&q->mutex);
    if (q->head - q->tail < MAX_QUEUE_LEN) {
        struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
        struct dpif_upcall *upcall = &u->upcall;
        struct ofpbuf *buf = &u->buf;
        size_t buf_size;

        upcall->type = type;

        /* Allocate buffer big enough for everything. */
        buf_size = ODPUTIL_FLOW_KEY_BYTES;
        if (userdata) {
            buf_size += NLA_ALIGN(userdata->nla_len);
        }
        buf_size += packet->size;
        ofpbuf_init(buf, buf_size);

        /* Put ODP flow. */
        odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port);
        upcall->key = buf->data;
        upcall->key_len = buf->size;

        /* Put userdata. */
        if (userdata) {
            upcall->userdata = ofpbuf_put(buf, userdata,
                                          NLA_ALIGN(userdata->nla_len));
        }

        upcall->packet.data = ofpbuf_put(buf, packet->data, packet->size);
        upcall->packet.size = packet->size;

        seq_change(q->seq);

        error = 0;
    } else {
        dp_netdev_count_packet(dp, DP_STAT_LOST);
        error = ENOBUFS;
    }
    ovs_mutex_unlock(&q->mutex);
    fat_rwlock_unlock(&dp->queue_rwlock);

    return error;
}

struct dp_netdev_execute_aux {
    struct dp_netdev *dp;
    const struct flow *key;
};

static void
dp_execute_cb(void *aux_, struct ofpbuf *packet,
              const struct pkt_metadata *md OVS_UNUSED,
              const struct nlattr *a, bool may_steal)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    struct dp_netdev_execute_aux *aux = aux_;
    int type = nl_attr_type(a);
    struct dp_netdev_port *p;

    switch ((enum ovs_action_attr)type) {
    case OVS_ACTION_ATTR_OUTPUT:
        p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
        if (p) {
            netdev_send(p->netdev, packet, may_steal);
        }
        break;

    case OVS_ACTION_ATTR_USERSPACE: {
        const struct nlattr *userdata;

        userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);

        dp_netdev_output_userspace(aux->dp, packet,
                                   flow_hash_5tuple(aux->key, 0)
                                       % aux->dp->n_handlers,
                                   DPIF_UC_ACTION, aux->key,
                                   userdata);

        if (may_steal) {
            ofpbuf_delete(packet);
        }
        break;
    }
    case OVS_ACTION_ATTR_PUSH_VLAN:
    case OVS_ACTION_ATTR_POP_VLAN:
    case OVS_ACTION_ATTR_PUSH_MPLS:
    case OVS_ACTION_ATTR_POP_MPLS:
    case OVS_ACTION_ATTR_SET:
    case OVS_ACTION_ATTR_SAMPLE:
    case OVS_ACTION_ATTR_UNSPEC:
    case __OVS_ACTION_ATTR_MAX:
        OVS_NOT_REACHED();
    }

}

static void
dp_netdev_execute_actions(struct dp_netdev *dp, const struct flow *key,
                          struct ofpbuf *packet, bool may_steal,
                          struct pkt_metadata *md,
                          const struct nlattr *actions, size_t actions_len)
{
    struct dp_netdev_execute_aux aux = {dp, key};

    odp_execute_actions(&aux, packet, may_steal, md,
                        actions, actions_len, dp_execute_cb);
}

const struct dpif_class dpif_netdev_class = {
    "netdev",
    dpif_netdev_enumerate,
    dpif_netdev_port_open_type,
    dpif_netdev_open,
    dpif_netdev_close,
    dpif_netdev_destroy,
    dpif_netdev_run,
    dpif_netdev_wait,
    dpif_netdev_get_stats,
    dpif_netdev_port_add,
    dpif_netdev_port_del,
    dpif_netdev_port_query_by_number,
    dpif_netdev_port_query_by_name,
    NULL,                       /* port_get_pid */
    dpif_netdev_port_dump_start,
    dpif_netdev_port_dump_next,
    dpif_netdev_port_dump_done,
    dpif_netdev_port_poll,
    dpif_netdev_port_poll_wait,
    dpif_netdev_flow_get,
    dpif_netdev_flow_put,
    dpif_netdev_flow_del,
    dpif_netdev_flow_flush,
    dpif_netdev_flow_dump_state_init,
    dpif_netdev_flow_dump_start,
    dpif_netdev_flow_dump_next,
    NULL,
    dpif_netdev_flow_dump_done,
    dpif_netdev_flow_dump_state_uninit,
    dpif_netdev_execute,
    NULL,                       /* operate */
    dpif_netdev_recv_set,
    dpif_netdev_handlers_set,
    dpif_netdev_queue_to_priority,
    dpif_netdev_recv,
    dpif_netdev_recv_wait,
    dpif_netdev_recv_purge,
};

static void
dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
                              const char *argv[], void *aux OVS_UNUSED)
{
    struct dp_netdev_port *port;
    struct dp_netdev *dp;
    odp_port_t port_no;

    ovs_mutex_lock(&dp_netdev_mutex);
    dp = shash_find_data(&dp_netdevs, argv[1]);
    if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
        ovs_mutex_unlock(&dp_netdev_mutex);
        unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
        return;
    }
    ovs_refcount_ref(&dp->ref_cnt);
    ovs_mutex_unlock(&dp_netdev_mutex);

    ovs_rwlock_wrlock(&dp->port_rwlock);
    if (get_port_by_name(dp, argv[2], &port)) {
        unixctl_command_reply_error(conn, "unknown port");
        goto exit;
    }

    port_no = u32_to_odp(atoi(argv[3]));
    if (!port_no || port_no == ODPP_NONE) {
        unixctl_command_reply_error(conn, "bad port number");
        goto exit;
    }
    if (dp_netdev_lookup_port(dp, port_no)) {
        unixctl_command_reply_error(conn, "port number already in use");
        goto exit;
    }
    hmap_remove(&dp->ports, &port->node);
    port->port_no = port_no;
    hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
    seq_change(dp->port_seq);
    unixctl_command_reply(conn, NULL);

exit:
    ovs_rwlock_unlock(&dp->port_rwlock);
    dp_netdev_unref(dp);
}

static void
dpif_dummy_register__(const char *type)
{
    struct dpif_class *class;

    class = xmalloc(sizeof *class);
    *class = dpif_netdev_class;
    class->type = xstrdup(type);
    dp_register_provider(class);
}

void
dpif_dummy_register(bool override)
{
    if (override) {
        struct sset types;
        const char *type;

        sset_init(&types);
        dp_enumerate_types(&types);
        SSET_FOR_EACH (type, &types) {
            if (!dp_unregister_provider(type)) {
                dpif_dummy_register__(type);
            }
        }
        sset_destroy(&types);
    }

    dpif_dummy_register__("dummy");

    unixctl_command_register("dpif-dummy/change-port-number",
                             "DP PORT NEW-NUMBER",
                             3, 3, dpif_dummy_change_port_number, NULL);
}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								/*
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
 								#include <config.h>
 								#include "dpif.h"
 								#include <ctype.h>
 								#include <errno.h>
 								#include <fcntl.h>
 								#include <inttypes.h>
 								#include <netinet/in.h>
-												Always #include <sys/socket.h> before <net/if.h>.

FreeBSD 8.0's <net/if.h> requires <sys/socket.h> to be included first,
even though I don't see any such requirement in POSIX.

											
										
										
											2010-05-26 10:05:19 -07:00
+								#include <sys/socket.h>
-												Work around bugs in system headers.

On some system, at least, one must include <sys/types.h> before
<netinet/in.h>, and <netinet/in.h> before <arpa/inet.h> or <net/if.h>.

From Jean Tourrilhes <jt@hpl.hp.com>.

											
										
										
											2010-02-12 12:51:36 -08:00
+								#include <net/if.h>
-												datapath: Replace "struct odp_action" by Netlink attributes.

In the medium term, we plan to migrate the datapath to use Netlink as its
communication channel.  In the short term, we need to be able to have
actions with 64-bit arguments but "struct odp_action" only has room for
48 bits.  So this patch shifts to variable-length arguments using Netlink
attributes, which starts in on the Netlink transition and makes 64-bit
arguments possible at the same time.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-10 10:40:58 -08:00
+								#include <stdint.h>
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include <stdlib.h>
 								#include <string.h>
 								#include <sys/ioctl.h>
 								#include <sys/stat.h>
 								#include <unistd.h>
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								#include "classifier.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "csum.h"
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								#include "dpif.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "dpif-provider.h"
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								#include "dummy.h"
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								#include "dynamic-string.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "flow.h"
 								#include "hmap.h"
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								#include "latch.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "list.h"
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								#include "meta-flow.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "netdev.h"
-												netdev-dpdk: Use multiple core for dpdk IO.

DPDK need to set _lcore_id for using multiple core.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 22:07:44 -07:00
+								#include "netdev-dpdk.h"
-												netdev: New function netdev_get_dpif_port().

In future patches, a netdev's datapath port name may not
necessarily be the same as its device name. This patch prepares for
this by making the distinction in the netdev and dpif layers.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2012-12-16 17:08:50 -08:00
+								#include "netdev-vport.h"
-												datapath: Replace "struct odp_action" by Netlink attributes.

In the medium term, we plan to migrate the datapath to use Netlink as its
communication channel.  In the short term, we need to be able to have
actions with 64-bit arguments but "struct odp_action" only has room for
48 bits.  So this patch shifts to variable-length arguments using Netlink
attributes, which starts in on the Netlink transition and makes 64-bit
arguments possible at the same time.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-10 10:40:58 -08:00
+								#include "netlink.h"
-												odp-execute: New module for executing datapath actions.

This moves generic action execution code out of lib/dpif-netedev.c
and into a new file, lib/odp-execute.c.

This is in preparation for using odp_execute_actions()
in lib/odp-util.c to handle recirculation/

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-29 15:06:38 +09:00
+								#include "odp-execute.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "odp-util.h"
 								#include "ofp-print.h"
 								#include "ofpbuf.h"
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								#include "ovs-rcu.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "packets.h"
 								#include "poll-loop.h"
-												dpif-netdev: Implement OVS_ACTION_ATTR_SAMPLE action.

OVS_ACTION_ATTR_SAMPLE has never been implemented in dpif-netdev.  This
commit implements it and adds a cast to enum ovs_action_type in the switch
statement that checks the action type, so that GCC complains if we forget
to add a case for a new action type.

I had to assign the return value of nl_attr_type() to a temporary variable,
because "switch ((enum ovs_action_type) nl_attr_type(a))" provoked a GCC
warning that I've never seen before:

../lib/dpif-netdev.c:1260: warning: cast from function call of type 'int'
     to non-matching type 'enum ovs_action_type'

											
										
										
											2011-10-11 11:07:14 -07:00
+								#include "random.h"
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								#include "seq.h"
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								#include "shash.h"
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								#include "sset.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "timeval.h"
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								#include "unixctl.h"
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								#include "util.h"
 								#include "vlog.h"
-												vlog: Introduce VLOG_DEFINE_THIS_MODULE for declaring vlog module in use.

Adding a macro to define the vlog module in use adds a level of
indirection, which makes it easier to change how the vlog module must be
defined.  A followup commit needs to do that, so getting these widespread
changes out of the way first should make that commit easier to review.

											
										
										
											2010-07-16 11:02:49 -07:00
-												vlog: Make client supply semicolon for VLOG_DEFINE_THIS_MODULE.

It's kind of odd for VLOG_DEFINE_THIS_MODULE to supply its own semicolon,
so this commit switches to the more common form.

											
										
										
											2010-10-19 14:47:01 -07:00
+								VLOG_DEFINE_THIS_MODULE(dpif_netdev);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								/* By default, choose a priority in the middle. */
 								#define NETDEV_RULE_PRIORITY 0x8000
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								#define NR_THREADS 1
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								/* Configuration parameters. */
 								enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								/* Queues. */
 								enum { MAX_QUEUE_LEN = 128 };   /* Maximum number of packets per queue. */
 								enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
 								BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								/* Protects against changes to 'dp_netdevs'. */
 								static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
 								/* Contains all 'struct dp_netdev's. */
 								static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
 								    = SHASH_INITIALIZER(&dp_netdevs);
-												dpif-netdev: Eliminate two malloc() calls per packet sent to "userspace".

This is easy enough, so it seems worthwhile now that FreeBSD is starting
to make more use of the "userspace switch".

CC: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-15 16:05:31 -07:00
+								struct dp_netdev_upcall {
 								    struct dpif_upcall upcall;  /* Queued upcall information. */
 								    struct ofpbuf buf;          /* ofpbuf instance for upcall.packet. */
 								};
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								/* A queue passing packets from a struct dp_netdev to its clients (handlers).
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								 * Any access at all requires the owning 'dp_netdev''s queue_rwlock and
 								 * its own mutex. */
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								struct dp_netdev_queue {
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    struct ovs_mutex mutex;
 								    struct seq *seq;      /* Incremented whenever a packet is queued. */
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
+								    struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN] OVS_GUARDED;
 								    unsigned int head OVS_GUARDED;
 								    unsigned int tail OVS_GUARDED;
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								};
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								/* Datapath based on the network device interface from netdev.h.
 								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
 								 * Some members, marked 'const', are immutable.  Accessing other members
 								 * requires synchronization, as noted in more detail below.
 								 *
 								 * Acquisition order is, from outermost to innermost:
 								 *
 								 *    dp_netdev_mutex (global)
 								 *    port_rwlock
 								 *    flow_mutex
 								 *    cls.rwlock
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								 *    queue_rwlock
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								 */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								struct dp_netdev {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    const struct dpif_class *const class;
 								    const char *const name;
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								    struct ovs_refcount ref_cnt;
 								    atomic_flag destroyed;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Flows.
 								     *
 								     * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock.
 								     *
 								     * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then
 								     * the 'cls->rwlock' write lock.  (The outer 'flow_mutex' allows writers to
 								     * atomically perform multiple operations on 'cls' and 'flow_table'.)
 								     */
 								    struct ovs_mutex flow_mutex;
 								    struct classifier cls;      /* Classifier.  Protected by cls.rwlock. */
 								    struct hmap flow_table OVS_GUARDED; /* Flow table. */
 								    /* Queues.
 								     *
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								     * 'queue_rwlock' protects the modification of 'handler_queues' and
 								     * 'n_handlers'.  The queue elements are protected by its
 								     * 'handler_queues''s mutex. */
 								    struct fat_rwlock queue_rwlock;
 								    struct dp_netdev_queue *handler_queues;
 								    uint32_t n_handlers;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Statistics.
 								     *
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								     * ovsthread_stats is internally synchronized. */
 								    struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Ports.
 								     *
 								     * Any lookup into 'ports' or any access to the dp_netdev_ports found
 								     * through 'ports' requires taking 'port_rwlock'. */
 								    struct ovs_rwlock port_rwlock;
 								    struct hmap ports OVS_GUARDED;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    struct seq *port_seq;       /* Incremented whenever a port changes. */
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
 								    /* Forwarding threads. */
 								    struct latch exit_latch;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    struct pmd_thread *pmd_threads;
 								    size_t n_pmd_threads;
 								    int pmd_count;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
 								                                                    odp_port_t)
 								    OVS_REQ_RDLOCK(dp->port_rwlock);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								enum dp_stat_type {
 								    DP_STAT_HIT,                /* Packets that matched in the flow table. */
 								    DP_STAT_MISS,               /* Packets that did not match. */
 								    DP_STAT_LOST,               /* Packets not passed up to the client. */
 								    DP_N_STATS
 								};
 								/* Contained by struct dp_netdev's 'stats' member.  */
 								struct dp_netdev_stats {
 								    struct ovs_mutex mutex;          /* Protects 'n'. */
 								    /* Indexed by DP_STAT_*, protected by 'mutex'. */
 								    unsigned long long int n[DP_N_STATS] OVS_GUARDED;
 								};
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								/* A port in a netdev-based datapath. */
 								struct dp_netdev_port {
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    struct hmap_node node;      /* Node in dp_netdev's 'ports'. */
 								    odp_port_t port_no;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct netdev *netdev;
-												netdev: Factor restoring flags into new "struct netdev_saved_flags".

This gets rid of the only per-instance data in "struct netdev", which
will make it possible to merge "struct netdev_dev" into "struct netdev" in
a later commit.

Ed Maste wrote the netdev-bsd changes in this commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ed Maste <emaste@freebsd.org>
Tested-by: Ed Maste <emaste@freebsd.org>

											
										
										
											2013-05-10 08:55:25 -07:00
+								    struct netdev_saved_flags *sf;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    struct netdev_rxq **rxq;
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								    struct ovs_refcount ref_cnt;
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    char *type;                 /* Port type as requested by user. */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								/* A flow in dp_netdev's 'flow_table'.
 								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
 								 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
 								 * its dp_netdev's classifier.  The text below calls this classifier 'cls'.
 								 *
 								 * Motivation
 								 * ----------
 								 *
 								 * The thread safety rules described here for "struct dp_netdev_flow" are
 								 * motivated by two goals:
 								 *
 								 *    - Prevent threads that read members of "struct dp_netdev_flow" from
 								 *      reading bad data due to changes by some thread concurrently modifying
 								 *      those members.
 								 *
 								 *    - Prevent two threads making changes to members of a given "struct
 								 *      dp_netdev_flow" from interfering with each other.
 								 *
 								 *
 								 * Rules
 								 * -----
 								 *
 								 * A flow 'flow' may be accessed without a risk of being freed by code that
 								 * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to
 								 * 'flow->ref_cnt' (or both).  Code that needs to hold onto a flow for a while
 								 * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt'
 								 * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'.
 								 *
 								 * 'flow->ref_cnt' protects 'flow' from being freed.  It doesn't protect the
 								 * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't
 								 * protect members of 'flow' from modification (that's 'flow->mutex').
 								 *
 								 * 'flow->mutex' protects the members of 'flow' from modification.  It doesn't
 								 * protect the flow from being deleted from 'cls' (that's 'cls->rwlock') and it
 								 * doesn't prevent the flow from being freed (that's 'flow->ref_cnt').
 								 *
 								 * Some members, marked 'const', are immutable.  Accessing other members
 								 * requires synchronization, as noted in more detail below.
 								 */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								struct dp_netdev_flow {
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    /* Packet classification. */
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    const struct cls_rule cr;   /* In owning dp_netdev's 'cls'. */
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Hash table index by unmasked flow. */
 								    const struct hmap_node node; /* In owning dp_netdev's 'flow_table'. */
 								    const struct flow flow;      /* The flow that created this entry. */
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    /* Protects members marked OVS_GUARDED.
 								     *
 								     * Acquire after datapath's flow_mutex. */
 								    struct ovs_mutex mutex OVS_ACQ_AFTER(dp_netdev_mutex);
 								    /* Statistics.
 								     *
 								     * Reading or writing these members requires 'mutex'. */
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
 								    /* Actions.
 								     *
 								     * Reading 'actions' requires 'mutex'.
 								     * Writing 'actions' requires 'mutex' and (to allow for transactions) the
 								     * datapath's flow_mutex. */
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    OVSRCU_TYPE(struct dp_netdev_actions *) actions;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								static void dp_netdev_flow_free(struct dp_netdev_flow *);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								/* Contained by struct dp_netdev_flow's 'stats' member.  */
 								struct dp_netdev_flow_stats {
 								    struct ovs_mutex mutex;         /* Guards all the other members. */
 								    long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
 								    long long int packet_count OVS_GUARDED; /* Number of packets matched. */
 								    long long int byte_count OVS_GUARDED;   /* Number of bytes matched. */
 								    uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
 								};
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								/* A set of datapath actions within a "struct dp_netdev_flow".
 								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
 								 * A struct dp_netdev_actions 'actions' may be accessed without a risk of being
 								 * freed by code that holds a read-lock or write-lock on 'flow->mutex' (where
 								 * 'flow' is the dp_netdev_flow for which 'flow->actions == actions') or that
 								 * owns a reference to 'actions->ref_cnt' (or both). */
 								struct dp_netdev_actions {
 								    /* These members are immutable: they do not change during the struct's
 								     * lifetime.  */
 								    struct nlattr *actions;     /* Sequence of OVS_ACTION_ATTR_* attributes. */
 								    unsigned int size;          /* Size of 'actions', in bytes. */
 								};
 								struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
 								                                                   size_t);
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								struct dp_netdev_actions *dp_netdev_flow_get_actions(
 								    const struct dp_netdev_flow *);
 								static void dp_netdev_actions_free(struct dp_netdev_actions *);
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								/* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
 								 * the performance overhead of interrupt processing.  Therefore netdev can
 								 * not implement rx-wait for these devices.  dpif-netdev needs to poll
 								 * these device to check for recv buffer.  pmd-thread does polling for
 								 * devices assigned to itself thread.
 								 *
 								 * DPDK used PMD for accessing NIC.
 								 *
 								 * A thread that receives packets from PMD ports, looks them up in the flow
 								 * table, and executes the actions it finds.
 								 **/
 								struct pmd_thread {
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    struct dp_netdev *dp;
 								    pthread_t thread;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    int id;
 								    atomic_uint change_seq;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    char *name;
 								};
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								/* Interface to netdev-based datapath. */
 								struct dpif_netdev {
 								    struct dpif dpif;
 								    struct dp_netdev *dp;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    uint64_t last_port_seq;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
 								                              struct dp_netdev_port **portp)
 								    OVS_REQ_RDLOCK(dp->port_rwlock);
 								static int get_port_by_name(struct dp_netdev *dp, const char *devname,
 								                            struct dp_netdev_port **portp)
 								    OVS_REQ_RDLOCK(dp->port_rwlock);
 								static void dp_netdev_free(struct dp_netdev *)
 								    OVS_REQUIRES(dp_netdev_mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void dp_netdev_flow_flush(struct dp_netdev *);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static int do_add_port(struct dp_netdev *dp, const char *devname,
 								                       const char *type, odp_port_t port_no)
 								    OVS_REQ_WRLOCK(dp->port_rwlock);
 								static int do_del_port(struct dp_netdev *dp, odp_port_t port_no)
 								    OVS_REQ_WRLOCK(dp->port_rwlock);
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								static void dp_netdev_destroy_all_queues(struct dp_netdev *dp)
 								    OVS_REQ_WRLOCK(dp->queue_rwlock);
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								static int dpif_netdev_open(const struct dpif_class *, const char *name,
 								                            bool create, struct dpif **);
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
+								static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								                                      int queue_no, int type,
 								                                      const struct flow *,
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								                                      const struct nlattr *userdata);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static void dp_netdev_execute_actions(struct dp_netdev *dp,
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								                                      const struct flow *, struct ofpbuf *, bool may_steal,
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								                                      struct pkt_metadata *,
-												datapath: Refactor actions in terms of match fields.

Almost all current actions can be expressed in the form of
push/pop/set <field>, where field is one of the match fields. We can
create three base actions and take a field. This has both a nice
symmetry and avoids inconsistencies where we can match on the vlan
TPID but not set it.
Following patch converts all actions to this new format.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

Bug #7115

											
										
										
											2011-10-21 14:38:54 -07:00
+								                                      const struct nlattr *actions,
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								                                      size_t actions_len);
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								                                 struct pkt_metadata *);
 								static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								static struct dpif_netdev *
 								dpif_netdev_cast(const struct dpif *dpif)
 								{
-												Replace most uses of assert by ovs_assert.

This is a straight search-and-replace, except that I also removed #include
<assert.h> from each file where there were no assert calls left.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2012-11-06 13:14:55 -08:00
+								    ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
 								}
 								static struct dp_netdev *
 								get_dp_netdev(const struct dpif *dpif)
 								{
 								    return dpif_netdev_cast(dpif)->dp;
 								}
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								static int
 								dpif_netdev_enumerate(struct sset *all_dps)
 								{
 								    struct shash_node *node;
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_lock(&dp_netdev_mutex);
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								    SHASH_FOR_EACH(node, &dp_netdevs) {
 								        sset_add(all_dps, node->name);
 								    }
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_unlock(&dp_netdev_mutex);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								    return 0;
 								}
-												dpif-netdev: Allow stub interfaces on the dummy datapath.

Future patches will need to add netdevs to the dummy datapath which
can't actually send or receive packets.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-01-08 14:37:23 -08:00
+								static bool
 								dpif_netdev_class_is_dummy(const struct dpif_class *class)
 								{
 								    return class != &dpif_netdev_class;
 								}
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								static const char *
 								dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
 								{
 								    return strcmp(type, "internal") ? type
-												dpif-netdev: Allow stub interfaces on the dummy datapath.

Future patches will need to add netdevs to the dummy datapath which
can't actually send or receive packets.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-01-08 14:37:23 -08:00
+								                  : dpif_netdev_class_is_dummy(class) ? "dummy"
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								                  : "tap";
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static struct dpif *
 								create_dpif_netdev(struct dp_netdev *dp)
 								{
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    uint16_t netflow_id = hash_string(dp->name, 0);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct dpif_netdev *dpif;
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								    ovs_refcount_ref(&dp->ref_cnt);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    dpif = xmalloc(sizeof *dpif);
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								    dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif->dp = dp;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    dpif->last_port_seq = seq_read(dp->port_seq);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    return &dpif->dpif;
 								}
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								/* Choose an unused, non-zero port number and return it on success.
 								 * Return ODPP_NONE on failure. */
 								static odp_port_t
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								choose_port(struct dp_netdev *dp, const char *name)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQ_RDLOCK(dp->port_rwlock)
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								{
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    uint32_t port_no;
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
 								    if (dp->class != &dpif_netdev_class) {
 								        const char *p;
 								        int start_no = 0;
 								        /* If the port name begins with "br", start the number search at
 								         * 100 to make writing tests easier. */
 								        if (!strncmp(name, "br", 2)) {
 								            start_no = 100;
 								        }
 								        /* If the port name contains a number, try to assign that port number.
 								         * This can make writing unit tests easier because port numbers are
 								         * predictable. */
 								        for (p = name; *p != '\0'; p++) {
 								            if (isdigit((unsigned char) *p)) {
 								                port_no = start_no + strtol(p, NULL, 10);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								                if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
 								                    && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								                    return u32_to_odp(port_no);
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								                }
 								                break;
 								            }
 								        }
 								    }
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
 								        if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								            return u32_to_odp(port_no);
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								        }
 								    }
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    return ODPP_NONE;
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
+								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								create_dp_netdev(const char *name, const struct dpif_class *class,
 								                 struct dp_netdev **dpp)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQUIRES(dp_netdev_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp;
 								    int error;
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    dp = xzalloc(sizeof *dp);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    shash_add(&dp_netdevs, name, dp);
 								    *CONST_CAST(const struct dpif_class **, &dp->class) = class;
 								    *CONST_CAST(const char **, &dp->name) = xstrdup(name);
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								    ovs_refcount_init(&dp->ref_cnt);
-												dpif-netdev: init atomic flag dp->destroyed

It is better to explicitly initialize the dp->destroy than to rely
on xzalloc().

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-17 22:10:53 -07:00
+								    atomic_flag_clear(&dp->destroyed);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
 								    ovs_mutex_init(&dp->flow_mutex);
 								    classifier_init(&dp->cls, NULL);
 								    hmap_init(&dp->flow_table);
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    fat_rwlock_init(&dp->queue_rwlock);
-												dpif-netdev: Use new "ovsthread_counter" to track dp statistics.

ovsthread_counter is an abstract interface that could be implemented
different ways.  The initial implementation is simple but less than
optimally efficient.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-23 14:04:13 -08:00
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    ovsthread_stats_init(&dp->stats);
-												dpif-netdev: Use new "ovsthread_counter" to track dp statistics.

ovsthread_counter is an abstract interface that could be implemented
different ways.  The initial implementation is simple but less than
optimally efficient.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-23 14:04:13 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_init(&dp->port_rwlock);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    hmap_init(&dp->ports);
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    dp->port_seq = seq_create();
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    latch_init(&dp->exit_latch);
-												tests: Rewrite unit tests to not expect bridge with odp zero.

A future commit will make all bridges of a particular type share a
single backing datapath.  That backing datapath will have a datapath
port number of zero and bridges will be assigned other numbers.  This
commit modifies the tests so that they don't expect port zero.

It adopts the convention that bridges of type "dummy" with a name of the
form "br<n>" will be assigned a port number of 100+<n>.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-13 17:45:00 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_wrlock(&dp->port_rwlock);
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    error = do_add_port(dp, name, "internal", ODPP_LOCAL);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_unlock(&dp->port_rwlock);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    if (error) {
 								        dp_netdev_free(dp);
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								        return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    *dpp = dp;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return 0;
 								}
 								static int
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								dpif_netdev_open(const struct dpif_class *class, const char *name,
-												dpif: Make dpif_class 'open' function take class instead of type name.

This makes it easier for dpif_provider implementations to share code but
distinguish the class actually in use, because comparing a pointer is
easier than comparing a string.

											
										
										
											2010-11-18 10:06:41 -08:00
+								                 bool create, struct dpif **dpifp)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    struct dp_netdev *dp;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    int error;
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_lock(&dp_netdev_mutex);
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
+								    dp = shash_find_data(&dp_netdevs, name);
 								    if (!dp) {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = (dp->class != class ? EINVAL
 								                 : create ? EEXIST
 								                 : 0);
 								    }
 								    if (!error) {
 								        *dpifp = create_dpif_netdev(dp);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_unlock(&dp_netdev_mutex);
-												dpif-netdev: Simplify code by using shash for names and dropping indexes.

											
										
										
											2010-11-24 12:35:22 -08:00
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static void
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								dp_netdev_purge_queues(struct dp_netdev *dp)
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    OVS_REQ_WRLOCK(dp->queue_rwlock)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    int i;
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    for (i = 0; i < dp->n_handlers; i++) {
 								        struct dp_netdev_queue *q = &dp->handler_queues[i];
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								        ovs_mutex_lock(&q->mutex);
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								        while (q->tail != q->head) {
-												dpif-netdev: Eliminate two malloc() calls per packet sent to "userspace".

This is easy enough, so it seems worthwhile now that FreeBSD is starting
to make more use of the "userspace switch".

CC: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-15 16:05:31 -07:00
+								            struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								            ofpbuf_uninit(&u->upcall.packet);
-												dpif-netdev: Eliminate two malloc() calls per packet sent to "userspace".

This is easy enough, so it seems worthwhile now that FreeBSD is starting
to make more use of the "userspace switch".

CC: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-15 16:05:31 -07:00
+								            ofpbuf_uninit(&u->buf);
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								        }
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								        ovs_mutex_unlock(&q->mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								}
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
 								 * through the 'dp_netdevs' shash while freeing 'dp'. */
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								static void
 								dp_netdev_free(struct dp_netdev *dp)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQUIRES(dp_netdev_mutex)
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								{
-												dpif-netdev: Avoid pointlessly maintaining a port count.

'n_ports' was only used for testing for nonzero, and we can rewrite the
code that does that to more straightforwardly use LIST_FOR_EACH_SAFE.

											
										
										
											2011-08-10 12:40:10 -07:00
+								    struct dp_netdev_port *port, *next;
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    struct dp_netdev_stats *bucket;
 								    int i;
-												dpif-netdev: Avoid pointlessly maintaining a port count.

'n_ports' was only used for testing for nonzero, and we can rewrite the
code that does that to more straightforwardly use LIST_FOR_EACH_SAFE.

											
										
										
											2011-08-10 12:40:10 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    shash_find_and_delete(&dp_netdevs, dp->name);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    dp_netdev_set_pmd_threads(dp, 0);
 								    free(dp->pmd_threads);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								    dp_netdev_flow_flush(dp);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_wrlock(&dp->port_rwlock);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    HMAP_FOR_EACH_SAFE (port, next, node, &dp->ports) {
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								        do_del_port(dp, port->port_no);
 								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_unlock(&dp->port_rwlock);
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
 								        ovs_mutex_destroy(&bucket->mutex);
 								        free_cacheline(bucket);
 								    }
 								    ovsthread_stats_destroy(&dp->stats);
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    fat_rwlock_wrlock(&dp->queue_rwlock);
 								    dp_netdev_destroy_all_queues(dp);
 								    fat_rwlock_unlock(&dp->queue_rwlock);
 								    fat_rwlock_destroy(&dp->queue_rwlock);
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    classifier_destroy(&dp->cls);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    hmap_destroy(&dp->flow_table);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_destroy(&dp->flow_mutex);
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_destroy(dp->port_seq);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    hmap_destroy(&dp->ports);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    latch_destroy(&dp->exit_latch);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    free(CONST_CAST(char *, dp->name));
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    free(dp);
 								}
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								static void
 								dp_netdev_unref(struct dp_netdev *dp)
 								{
 								    if (dp) {
 								        /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
 								         * get a new reference to 'dp' through the 'dp_netdevs' shash. */
 								        ovs_mutex_lock(&dp_netdev_mutex);
 								        if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
 								            dp_netdev_free(dp);
 								        }
 								        ovs_mutex_unlock(&dp_netdev_mutex);
 								    }
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void
 								dpif_netdev_close(struct dpif *dpif)
 								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    dp_netdev_unref(dp);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    free(dpif);
 								}
 								static int
-												Fix some regressions from the merge from master.

											
										
										
											2010-02-08 13:22:41 -05:00
+								dpif_netdev_destroy(struct dpif *dpif)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Take advantage of ovs_refcount for dp_netdev.

By making "destroyed" own a reference, we can treat dp_netdev's ref_cnt
like any other in Open vSwitch.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 19:41:10 -08:00
+								    if (!atomic_flag_test_and_set(&dp->destroyed)) {
 								        if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
 								            /* Can't happen: 'dpif' still owns a reference to 'dp'. */
 								            OVS_NOT_REACHED();
 								        }
 								    }
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return 0;
 								}
 								static int
-												dpif: Avoid use of  "struct ovs_dp_stats" in platform-independent modules.

Over time we wish to reduce the number of datapath-protocol.h definitions
used directly outside of Linux-specific code.  This commit removes use of
"struct ovs_dp_stats" from platform-independent code.

Bug #7559.

											
										
										
											2011-10-05 11:18:13 -07:00
+								dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    struct dp_netdev_stats *bucket;
 								    size_t i;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_rdlock(&dp->cls.rwlock);
-												ovs-dpctl: Show number of flows

Expose the number of flows present in a datapath to user-space
and to users via ovs-dpctl show.

e.g.:

ovs-dpctl show br3
system@br3:
	lookups: frags:0, hit:0, missed:0, lost:0
	flows: 0
	...

Signed-off-by: Simon Horman <horms@verge.net.au>
[Jesse: Add same logic to userspace datapath.]
Signed-off-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-08-04 08:04:10 +09:00
+								    stats->n_flows = hmap_count(&dp->flow_table);
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_unlock(&dp->cls.rwlock);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								    stats->n_hit = stats->n_missed = stats->n_lost = 0;
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
 								        ovs_mutex_lock(&bucket->mutex);
 								        stats->n_hit += bucket->n[DP_STAT_HIT];
 								        stats->n_missed += bucket->n[DP_STAT_MISS];
 								        stats->n_lost += bucket->n[DP_STAT_LOST];
 								        ovs_mutex_unlock(&bucket->mutex);
 								    }
-												dpif-linux: fix the size of n_masks

The command ovs-dpctl can wrongly output the masks even if the
datapath does not implement mega flows. In this case the output
will be similar to the following:

system@ovs-system:
	lookups: hit:14 missed:41 lost:0
	flows: 0
	masks: hit:18446744073709551615 total:4294967295
		hit/pkt:335395346794719104.00
	port 0: ovs-system (internal)
	port 1: gre_system (gre: df_default=false, ttl=0)
	port 2: ots-br0 (internal)
	port 3: int0 (internal)
	port 4: vnet0
	port 5: vnet1

The problem depends on the fact that n_masks stats is stored as a
uint32 in the struct ovs_dp_megaflow_stats and as a uint64 in the
struct dpif_dp_stats. UINT32_MAX instead of UINT64_MAX should be
used to detect if the datapath supports megaflows or not.

Signed-off-by: Francesco Fusco <ffusco@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-17 20:18:18 +01:00
+								    stats->n_masks = UINT32_MAX;
-												dpif-linux: collect and display mega flow mask stats

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-10-21 14:37:34 -07:00
+								    stats->n_mask_hit = UINT64_MAX;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return 0;
 								}
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								static void
 								dp_netdev_reload_pmd_threads(struct dp_netdev *dp)
 								{
 								    int i;
 								    for (i = 0; i < dp->n_pmd_threads; i++) {
 								        struct pmd_thread *f = &dp->pmd_threads[i];
 								        int id;
 								        atomic_add(&f->change_seq, 1, &id);
 								   }
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								            odp_port_t port_no)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQ_WRLOCK(dp->port_rwlock)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												netdev: Factor restoring flags into new "struct netdev_saved_flags".

This gets rid of the only per-instance data in "struct netdev", which
will make it possible to merge "struct netdev_dev" into "struct netdev" in
a later commit.

Ed Maste wrote the netdev-bsd changes in this commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ed Maste <emaste@freebsd.org>
Tested-by: Ed Maste <emaste@freebsd.org>

											
										
										
											2013-05-10 08:55:25 -07:00
+								    struct netdev_saved_flags *sf;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct dp_netdev_port *port;
 								    struct netdev *netdev;
-												dpif-netdev: Do not allow adding loopback devices

Signed-off-by: Alexandru Copot <alex.mihai.c@gmail.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-07 12:35:15 +03:00
+								    enum netdev_flags flags;
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    const char *open_type;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    int error;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    int i;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    /* XXX reject devices already in some dp_netdev. */
 								    /* Open and validate network device. */
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								    open_type = dpif_netdev_port_open_type(dp->class, type);
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    error = netdev_open(devname, open_type, &netdev);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    if (error) {
 								        return error;
 								    }
 								    /* XXX reject non-Ethernet devices */
-												dpif-netdev: Do not allow adding loopback devices

Signed-off-by: Alexandru Copot <alex.mihai.c@gmail.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-07 12:35:15 +03:00
+								    netdev_get_flags(netdev, &flags);
 								    if (flags & NETDEV_LOOPBACK) {
 								        VLOG_ERR("%s: cannot add a loopback device", devname);
 								        netdev_close(netdev);
 								        return EINVAL;
 								    }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    port = xzalloc(sizeof *port);
 								    port->port_no = port_no;
 								    port->netdev = netdev;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    port->type = xstrdup(type);
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    for (i = 0; i < netdev_n_rxq(netdev); i++) {
 								        error = netdev_rxq_open(netdev, &port->rxq[i], i);
 								        if (error
 								            && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
 								            VLOG_ERR("%s: cannot receive packets on this network device (%s)",
 								                     devname, ovs_strerror(errno));
 								            netdev_close(netdev);
 								            return error;
 								        }
-												netdev: Clean up and refactor packet receive interface.

The Open vSwitch tree only has one user of the ability for a netdev to
receive packets from a network device.  Thus, this commit simplifies the
common-case use of the netdev interface by replacing the "ethertype" option
from "struct netdev_options" by a new netdev_listen() call.

The only user of netdev_listen() wants to receive all packets from a
network device, so this commit also removes the ability to restrict the
received packets to a particular protocol.  (This ability was once used by
the Open vSwitch integrated DHCP client, but that code has been removed.)

This commit also simplifies and improves the implementation of the code
in netdev-linux that started listening to a network device.  Before, I had
not figured out how to avoid receiving all packets on all devices before
binding to a particular device, but I took a closer look at the kernel code
and figured it out.

I've tested that the userspace datapath (dpif-netdev), the only user of
netdev_recv(), still works after this change.

											
										
										
											2011-08-05 14:15:32 -07:00
+								    }
-												netdev: Factor restoring flags into new "struct netdev_saved_flags".

This gets rid of the only per-instance data in "struct netdev", which
will make it possible to merge "struct netdev_dev" into "struct netdev" in
a later commit.

Ed Maste wrote the netdev-bsd changes in this commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ed Maste <emaste@freebsd.org>
Tested-by: Ed Maste <emaste@freebsd.org>

											
										
										
											2013-05-10 08:55:25 -07:00
+								    error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    if (error) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								        for (i = 0; i < netdev_n_rxq(netdev); i++) {
 								            netdev_rxq_close(port->rxq[i]);
 								        }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        netdev_close(netdev);
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								        free(port->rxq);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        free(port);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        return error;
 								    }
-												netdev: Factor restoring flags into new "struct netdev_saved_flags".

This gets rid of the only per-instance data in "struct netdev", which
will make it possible to merge "struct netdev_dev" into "struct netdev" in
a later commit.

Ed Maste wrote the netdev-bsd changes in this commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ed Maste <emaste@freebsd.org>
Tested-by: Ed Maste <emaste@freebsd.org>

											
										
										
											2013-05-10 08:55:25 -07:00
+								    port->sf = sf;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
 								    if (netdev_is_pmd(netdev)) {
 								        dp->pmd_count++;
 								        dp_netdev_set_pmd_threads(dp, NR_THREADS);
 								        dp_netdev_reload_pmd_threads(dp);
 								    }
 								    ovs_refcount_init(&port->ref_cnt);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_change(dp->port_seq);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    return 0;
 								}
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
+								static int
 								dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								                     odp_port_t *port_nop)
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												netdev-vport: Don't return static data in netdev_vport_get_dpif_port().

Returning a static data buffer makes code more brittle and definitely
not thread-safe, so this commit switches to using a caller-provided
buffer instead.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-01 11:05:28 -07:00
+								    char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
 								    const char *dpif_port;
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    odp_port_t port_no;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    int error;
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_wrlock(&dp->port_rwlock);
-												netdev-vport: Don't return static data in netdev_vport_get_dpif_port().

Returning a static data buffer makes code more brittle and definitely
not thread-safe, so this commit switches to using a caller-provided
buffer instead.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-01 11:05:28 -07:00
+								    dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    if (*port_nop != ODPP_NONE) {
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								        port_no = *port_nop;
 								        error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
-												dpif: Allow the port number to be requested when adding an interface.

The datapath allows requesting a specific port number for a port, but
the dpif interface didn't expose it.  This commit adds that support.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-07-27 23:58:24 -07:00
+								    } else {
-												netdev-vport: Don't return static data in netdev_vport_get_dpif_port().

Returning a static data buffer makes code more brittle and definitely
not thread-safe, so this commit switches to using a caller-provided
buffer instead.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-01 11:05:28 -07:00
+								        port_no = choose_port(dp, dpif_port);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = port_no == ODPP_NONE ? EFBIG : 0;
-												dpif: Allow the port number to be requested when adding an interface.

The datapath allows requesting a specific port number for a port, but
the dpif interface didn't expose it.  This commit adds that support.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-07-27 23:58:24 -07:00
+								    }
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    if (!error) {
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
+								        *port_nop = port_no;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
-												dpif-netdev: Make port numbers predictable for dummy dpif, for unit tests.

The unit tests feed a lot of flows through the ofproto-dpif "trace"
command, which means that they need to know the port numbers of the ports
that they create.  Until now, they've had to actually query those port
numbers from the database, which is a bit of unnecessary overhead for unit
tests.

This commit makes dummy dpif port numbers predictable: if the name of a
port contains a number, then the dummy dpif uses that number, if it is
valid and available, as the port number.

This commit also simplifies the unit tests that previously queried port
numbers to depend on the new behavior.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 15:23:23 -08:00
+								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_unlock(&dp->port_rwlock);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    int error;
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_wrlock(&dp->port_rwlock);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    error = port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_unlock(&dp->port_rwlock);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static bool
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								is_valid_port_number(odp_port_t port_no)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    return port_no != ODPP_NONE;
 								}
 								static struct dp_netdev_port *
 								dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQ_RDLOCK(dp->port_rwlock)
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								{
 								    struct dp_netdev_port *port;
 								    HMAP_FOR_EACH_IN_BUCKET (port, node, hash_int(odp_to_u32(port_no), 0),
 								                             &dp->ports) {
 								        if (port->port_no == port_no) {
 								            return port;
 								        }
 								    }
 								    return NULL;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
 								get_port_by_number(struct dp_netdev *dp,
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								                   odp_port_t port_no, struct dp_netdev_port **portp)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQ_RDLOCK(dp->port_rwlock)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    if (!is_valid_port_number(port_no)) {
 								        *portp = NULL;
 								        return EINVAL;
 								    } else {
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								        *portp = dp_netdev_lookup_port(dp, port_no);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        return *portp ? 0 : ENOENT;
 								    }
 								}
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								static void
 								port_ref(struct dp_netdev_port *port)
 								{
 								    if (port) {
 								        ovs_refcount_ref(&port->ref_cnt);
 								    }
 								}
 								static void
 								port_unref(struct dp_netdev_port *port)
 								{
 								    if (port && ovs_refcount_unref(&port->ref_cnt) == 1) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								        int i;
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								        netdev_close(port->netdev);
 								        netdev_restore_flags(port->sf);
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
 								        for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
 								            netdev_rxq_close(port->rxq[i]);
 								        }
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								        free(port->type);
 								        free(port);
 								    }
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
 								get_port_by_name(struct dp_netdev *dp,
 								                 const char *devname, struct dp_netdev_port **portp)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQ_RDLOCK(dp->port_rwlock)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev_port *port;
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    HMAP_FOR_EACH (port, node, &dp->ports) {
-												dpif-netdev: Don't run port names through netdev_vport_get_dpif_port().

The ports that exist within a dpif have already been translated through
netdev_vport_get_dpif_port(), so there is no value to translating them
again in the interfaces that query or dump ports (and possibly a drawback
if somehow the translation could change).

After this change, dpif-netdev translates port names in just one place,
the port_add path, which makes dpif-netdev act the same way as dpif-linux
in this respect.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-06-06 15:27:15 -07:00
+								        if (!strcmp(netdev_get_name(port->netdev), devname)) {
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            *portp = port;
 								            return 0;
 								        }
 								    }
 								    return ENOENT;
 								}
 								static int
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								do_del_port(struct dp_netdev *dp, odp_port_t port_no)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQ_WRLOCK(dp->port_rwlock)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev_port *port;
 								    int error;
 								    error = get_port_by_number(dp, port_no, &port);
 								    if (error) {
 								        return error;
 								    }
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    hmap_remove(&dp->ports, &port->node);
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_change(dp->port_seq);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    if (netdev_is_pmd(port->netdev)) {
 								        dp_netdev_reload_pmd_threads(dp);
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Add ref-counting for port.

DPDK Poll mode thread need to keep ref to dpif-port.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:19 -07:00
+								    port_unref(port);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return 0;
 								}
 								static void
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								answer_port_query(const struct dp_netdev_port *port,
 								                  struct dpif_port *dpif_port)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Don't run port names through netdev_vport_get_dpif_port().

The ports that exist within a dpif have already been translated through
netdev_vport_get_dpif_port(), so there is no value to translating them
again in the interfaces that query or dump ports (and possibly a drawback
if somehow the translation could change).

After this change, dpif-netdev translates port names in just one place,
the port_add path, which makes dpif-netdev act the same way as dpif-linux
in this respect.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-06-06 15:27:15 -07:00
+								    dpif_port->name = xstrdup(netdev_get_name(port->netdev));
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    dpif_port->type = xstrdup(port->type);
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								    dpif_port->port_no = port->port_no;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                                 struct dpif_port *dpif_port)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    struct dp_netdev_port *port;
 								    int error;
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_rdlock(&dp->port_rwlock);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    error = get_port_by_number(dp, port_no, &port);
-												dpif: Add new dpif_port_exists() function.

Provide the ability to determine whether a port exists in a datapath
without having to deal with a "dpif_port" structure as with
dpif_port_query_by_name().  A future patch will use this function.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-17 23:11:53 -07:00
+								    if (!error && dpif_port) {
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								        answer_port_query(port, dpif_port);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_unlock(&dp->port_rwlock);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return error;
 								}
 								static int
 								dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                               struct dpif_port *dpif_port)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    struct dp_netdev_port *port;
 								    int error;
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_rdlock(&dp->port_rwlock);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    error = get_port_by_name(dp, devname, &port);
-												dpif: Add new dpif_port_exists() function.

Provide the ability to determine whether a port exists in a datapath
without having to deal with a "dpif_port" structure as with
dpif_port_query_by_name().  A future patch will use this function.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-17 23:11:53 -07:00
+								    if (!error && dpif_port) {
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								        answer_port_query(port, dpif_port);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_unlock(&dp->port_rwlock);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return error;
 								}
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								static void
 								dp_netdev_flow_free(struct dp_netdev_flow *flow)
 								{
 								    struct dp_netdev_flow_stats *bucket;
 								    size_t i;
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) {
 								        ovs_mutex_destroy(&bucket->mutex);
 								        free_cacheline(bucket);
 								    }
 								    ovsthread_stats_destroy(&flow->stats);
 								    cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
 								    dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
 								    ovs_mutex_destroy(&flow->mutex);
 								    free(flow);
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
 								    OVS_REQ_WRLOCK(dp->cls.rwlock)
 								    OVS_REQUIRES(dp->flow_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
 								    struct hmap_node *node = CONST_CAST(struct hmap_node *, &flow->node);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    classifier_remove(&dp->cls, cr);
 								    hmap_remove(&dp->flow_table, node);
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    ovsrcu_postpone(dp_netdev_flow_free, flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static void
 								dp_netdev_flow_flush(struct dp_netdev *dp)
 								{
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow, *next;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_lock(&dp->flow_mutex);
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_wrlock(&dp->cls.rwlock);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    HMAP_FOR_EACH_SAFE (netdev_flow, next, node, &dp->flow_table) {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        dp_netdev_remove_flow(dp, netdev_flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_unlock(&dp->cls.rwlock);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_unlock(&dp->flow_mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
 								dpif_netdev_flow_flush(struct dpif *dpif)
 								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dp_netdev_flow_flush(dp);
 								    return 0;
 								}
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								struct dp_netdev_port_state {
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    uint32_t bucket;
 								    uint32_t offset;
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								    char *name;
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								};
 								static int
 								dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
 								{
 								    *statep = xzalloc(sizeof(struct dp_netdev_port_state));
 								    return 0;
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                           struct dpif_port *dpif_port)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    struct dp_netdev_port_state *state = state_;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    struct hmap_node *node;
 								    int retval;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_rdlock(&dp->port_rwlock);
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    node = hmap_at_position(&dp->ports, &state->bucket, &state->offset);
 								    if (node) {
 								        struct dp_netdev_port *port;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								        port = CONTAINER_OF(node, struct dp_netdev_port, node);
 								        free(state->name);
 								        state->name = xstrdup(netdev_get_name(port->netdev));
 								        dpif_port->name = state->name;
 								        dpif_port->type = port->type;
 								        dpif_port->port_no = port->port_no;
 								        retval = 0;
 								    } else {
 								        retval = EOF;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_unlock(&dp->port_rwlock);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    return retval;
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								}
 								static int
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								{
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								    struct dp_netdev_port_state *state = state_;
 								    free(state->name);
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    free(state);
 								    return 0;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												Rename UNUSED macro to OVS_UNUSED to avoid naming conflict.

Requested by Jean Tourrilhes <jt@hpl.hp.com>.

											
										
										
											2010-02-11 10:59:47 -08:00
+								dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    uint64_t new_port_seq;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    int error;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    new_port_seq = seq_read(dpif->dp->port_seq);
 								    if (dpif->last_port_seq != new_port_seq) {
 								        dpif->last_port_seq = new_port_seq;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = ENOBUFS;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = EAGAIN;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static void
 								dpif_netdev_port_poll_wait(const struct dpif *dpif_)
 								{
 								    struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								}
 								static struct dp_netdev_flow *
 								dp_netdev_flow_cast(const struct cls_rule *cr)
 								{
 								    return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static struct dp_netdev_flow *
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *flow)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_EXCLUDED(dp->cls.rwlock)
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								{
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    struct dp_netdev_flow *netdev_flow;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_rdlock(&dp->cls.rwlock);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    netdev_flow = dp_netdev_flow_cast(classifier_lookup(&dp->cls, flow, NULL));
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_unlock(&dp->cls.rwlock);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    return netdev_flow;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								}
 								static struct dp_netdev_flow *
 								dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQ_RDLOCK(dp->cls.rwlock)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								                             &dp->flow_table) {
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        if (flow_equal(&netdev_flow->flow, flow)) {
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            return netdev_flow;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        }
 								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    return NULL;
 								}
 								static void
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								get_dpif_flow_stats(struct dp_netdev_flow *netdev_flow,
 								                    struct dpif_flow_stats *stats)
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								{
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    struct dp_netdev_flow_stats *bucket;
 								    size_t i;
 								    memset(stats, 0, sizeof *stats);
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
 								        ovs_mutex_lock(&bucket->mutex);
 								        stats->n_packets += bucket->packet_count;
 								        stats->n_bytes += bucket->byte_count;
 								        stats->used = MAX(stats->used, bucket->used);
 								        stats->tcp_flags |= bucket->tcp_flags;
 								        ovs_mutex_unlock(&bucket->mutex);
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								static int
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
 								                              const struct nlattr *mask_key,
 								                              uint32_t mask_key_len, const struct flow *flow,
 								                              struct flow *mask)
 								{
 								    if (mask_key_len) {
-												dpif-netdev: Make a log message more detailed.

This would have helped me track down a bug I was hunting just now.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-02-04 08:07:45 -08:00
+								        enum odp_key_fitness fitness;
 								        fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
 								        if (fitness) {
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            /* This should not happen: it indicates that
 								             * odp_flow_key_from_mask() and odp_flow_key_to_mask()
 								             * disagree on the acceptable form of a mask.  Log the problem
 								             * as an error, with enough details to enable debugging. */
 								            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 								            if (!VLOG_DROP_ERR(&rl)) {
 								                struct ds s;
 								                ds_init(&s);
 								                odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
 								                                true);
-												dpif-netdev: Make a log message more detailed.

This would have helped me track down a bug I was hunting just now.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-02-04 08:07:45 -08:00
+								                VLOG_ERR("internal error parsing flow mask %s (%s)",
 								                         ds_cstr(&s), odp_key_fitness_to_string(fitness));
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								                ds_destroy(&s);
 								            }
 								            return EINVAL;
 								        }
 								        /* Force unwildcard the in_port. */
 								        mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
 								    } else {
 								        enum mf_field_id id;
 								        /* No mask key, unwildcard everything except fields whose
 								         * prerequisities are not met. */
 								        memset(mask, 0x0, sizeof *mask);
 								        for (id = 0; id < MFF_N_IDS; ++id) {
 								            /* Skip registers and metadata. */
 								            if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
 								                && id != MFF_METADATA) {
 								                const struct mf_field *mf = mf_from_id(id);
 								                if (mf_are_prereqs_ok(mf, flow)) {
 								                    mf_mask_field(mf, mask);
 								                }
 								            }
 								        }
 								    }
 								    return 0;
 								}
 								static int
 								dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
 								                              struct flow *flow)
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								{
-												dpif-netdev: Make "packet-out" with in_port=OFPP_CONTROLLER work again.

Commit 4e022ec09e14 (Create specific types for ofp and odp port) broke
OpenFlow OFPP_PACKET_OUT requests that use in_port=OFPP_CONTROLLER.  This
commit fixes the problem and adds a regression test.

CC: Alex Wang <alexw@nicira.com>
Reported-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-09 09:23:02 -07:00
+								    odp_port_t in_port;
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    if (odp_flow_key_to_flow(key, key_len, flow)) {
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								        /* This should not happen: it indicates that odp_flow_key_from_flow()
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								         * and odp_flow_key_to_flow() disagree on the acceptable form of a
 								         * flow.  Log the problem as an error, with enough details to enable
 								         * debugging. */
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 								        if (!VLOG_DROP_ERR(&rl)) {
 								            struct ds s;
 								            ds_init(&s);
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								            odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								            VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
 								            ds_destroy(&s);
 								        }
 								        return EINVAL;
 								    }
-												dpif-netdev: Make "packet-out" with in_port=OFPP_CONTROLLER work again.

Commit 4e022ec09e14 (Create specific types for ofp and odp port) broke
OpenFlow OFPP_PACKET_OUT requests that use in_port=OFPP_CONTROLLER.  This
commit fixes the problem and adds a regression test.

CC: Alex Wang <alexw@nicira.com>
Reported-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-09 09:23:02 -07:00
+								    in_port = flow->in_port.odp_port;
 								    if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
-												datapath: Allow a packet with no input port to omit OVS_KEY_ATTR_IN_PORT.

When ovs-vswitchd executes actions on a synthesized packet, that is, on a
packet that is not being forwarded from any particular port but is being
generated by ovs-vswitchd itself or by an OpenFlow controller (using a
OFPT_PACKET_OUT message with an in_port of OFPP_NONE), there is no good
choice for the in_port to pass to the kernel in the flow in the
OVS_PACKET_CMD_EXECUTE message.  This commit allows ovs-vswitchd to omit
the in_port entirely in this case.

This fixes a bug in OFPT_PACKET_OUT: using an in_port of OFPP_NONE would
cause the packet to be dropped by the kernel, since that's an invalid
input port.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Reported-by: Aaron Rosen <arosen@clemson.edu>

											
										
										
											2011-09-08 16:30:20 -07:00
+								        return EINVAL;
 								    }
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    return 0;
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												datapath: Eliminate 'flags' member from odp_flow.

Nothing was productively using the 'flags' member of odp_flow, so this
commit removes it.

ODPFF_ZERO_TCP_FLAGS isn't used at all (as of the previous commit).

ODPFF_EOF has been replaced by a special case of the 'key_len' member.
This will go away, too, once AF_NETLINK starts being used.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-17 14:43:30 -08:00
+								dpif_netdev_flow_get(const struct dpif *dpif,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                     const struct nlattr *nl_key, size_t nl_key_len,
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								                     struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												datapath: Change ODP_FLOW_GET to retrieve only a single flow at a time.

This brings the code closer to what the Netlink interface will need to
implement.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-17 14:40:58 -08:00
+								    struct flow key;
 								    int error;
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
-												datapath: Change ODP_FLOW_GET to retrieve only a single flow at a time.

This brings the code closer to what the Netlink interface will need to
implement.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-17 14:40:58 -08:00
+								    if (error) {
 								        return error;
 								    }
-												flow: Separate "flow_t" from "struct odp_flow_key".

The "struct odp_flow_key" used in the kernel datapath is conceptually
separate from the "flow_t" used in userspace, but until now we have
used the latter as a typedef for the former for convenience.  This commit
separates them.  This makes it possible in upcoming commits to change
them independently.

This is cross-ported from the "wdp" branch, which has had it for months.

											
										
										
											2010-10-11 13:31:35 -07:00
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_rdlock(&dp->cls.rwlock);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    netdev_flow = dp_netdev_find_flow(dp, &key);
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_unlock(&dp->cls.rwlock);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    if (netdev_flow) {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        if (stats) {
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								            get_dpif_flow_stats(netdev_flow, stats);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        }
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        if (actionsp) {
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            struct dp_netdev_actions *actions;
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            actions = dp_netdev_flow_get_actions(netdev_flow);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								            *actionsp = ofpbuf_clone_data(actions->actions, actions->size);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        }
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								     } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = ENOENT;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												datapath: Change ODP_FLOW_GET to retrieve only a single flow at a time.

This brings the code closer to what the Netlink interface will need to
implement.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-17 14:40:58 -08:00
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow,
 								                   const struct flow_wildcards *wc,
 								                   const struct nlattr *actions,
 								                   size_t actions_len)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_REQUIRES(dp->flow_mutex)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    struct match match;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    netdev_flow = xzalloc(sizeof *netdev_flow);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    *CONST_CAST(struct flow *, &netdev_flow->flow) = *flow;
 								    ovs_mutex_init(&netdev_flow->mutex);
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    ovsthread_stats_init(&netdev_flow->stats);
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    ovsrcu_set(&netdev_flow->actions,
 								               dp_netdev_actions_create(actions, actions_len));
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
 								    match_init(&match, flow, wc);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
 								                  &match, NETDEV_RULE_PRIORITY);
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_wrlock(&dp->cls.rwlock);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    classifier_insert(&dp->cls,
 								                      CONST_CAST(struct cls_rule *, &netdev_flow->cr));
 								    hmap_insert(&dp->flow_table,
 								                CONST_CAST(struct hmap_node *, &netdev_flow->node),
 								                flow_hash(flow, 0));
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_unlock(&dp->cls.rwlock);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
 								    return 0;
 								}
 								static void
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								clear_stats(struct dp_netdev_flow *netdev_flow)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    struct dp_netdev_flow_stats *bucket;
 								    size_t i;
 								    OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
 								        ovs_mutex_lock(&bucket->mutex);
 								        bucket->used = 0;
 								        bucket->packet_count = 0;
 								        bucket->byte_count = 0;
 								        bucket->tcp_flags = 0;
 								        ovs_mutex_unlock(&bucket->mutex);
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												dpif: Change provider interface to consistently use operation structs.

Until now, a "flow put" has represented its parameters in two different
ways, depending on whether it was coming from dpif_flow_put() or from
dpif_operate(), and similarly for an "execute" operation.  This commit
adopts the operation struct consistently within the dpif provider
interface, which seems cleaner.

This commit also factors out logging for flow puts and executes, which
is useful in the following commit.

This doesn't change the dpif client interface, since the two forms are
more convenient for clients than always filling out an operation struct.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:39:03 -08:00
+								dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    struct flow flow;
 								    struct flow_wildcards wc;
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    int error;
-												dpif-netdev: Properly create exact match masks.

Normally OVS userspace supplies a mask along with a flow key for each
new data path flow that should be created.  OVS also provides an
option to disable the kernel wildcarding, in which case the flows are
created without a mask.  When kernel wildcarding is disabled, the
datapath should use exact match, i.e. not wildcard any bits in the
flow key.  Currently, what happens with the userspace datapath instead
is that a datapath flow with mostly empty mask is created (i.e., most
fields are wildcarded), as the current code does not examine the given
mask key length to find out that the mask key is actually empty.  This
results in the same datapath flow matching on packets of multiple
different flows, wrong actions being processed, and stats being
incorrect.

This patch refactors userspace datapath code to explicitly initialize
a suitable exact match mask when a flow put without a mask is
executed.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-11 11:07:01 -08:00
+								    error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow);
 								    if (error) {
 								        return error;
 								    }
 								    error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
 								                                          put->mask, put->mask_len,
 								                                          &flow, &wc.masks);
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    if (error) {
 								        return error;
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_lock(&dp->flow_mutex);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    netdev_flow = dp_netdev_lookup_flow(dp, &flow);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    if (!netdev_flow) {
-												dpif: Change provider interface to consistently use operation structs.

Until now, a "flow put" has represented its parameters in two different
ways, depending on whether it was coming from dpif_flow_put() or from
dpif_operate(), and similarly for an "execute" operation.  This commit
adopts the operation struct consistently within the dpif provider
interface, which seems cleaner.

This commit also factors out logging for flow puts and executes, which
is useful in the following commit.

This doesn't change the dpif client interface, since the two forms are
more convenient for clients than always filling out an operation struct.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:39:03 -08:00
+								        if (put->flags & DPIF_FP_CREATE) {
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
-												dpif: Change provider interface to consistently use operation structs.

Until now, a "flow put" has represented its parameters in two different
ways, depending on whether it was coming from dpif_flow_put() or from
dpif_operate(), and similarly for an "execute" operation.  This commit
adopts the operation struct consistently within the dpif provider
interface, which seems cleaner.

This commit also factors out logging for flow puts and executes, which
is useful in the following commit.

This doesn't change the dpif client interface, since the two forms are
more convenient for clients than always filling out an operation struct.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:39:03 -08:00
+								                if (put->stats) {
 								                    memset(put->stats, 0, sizeof *put->stats);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                }
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								                error = dp_netdev_flow_add(dp, &flow, &wc, put->actions,
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								                                           put->actions_len);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								                error = EFBIG;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            }
 								        } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								            error = ENOENT;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        }
 								    } else {
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        if (put->flags & DPIF_FP_MODIFY
 								            && flow_equal(&flow, &netdev_flow->flow)) {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								            struct dp_netdev_actions *new_actions;
 								            struct dp_netdev_actions *old_actions;
 								            new_actions = dp_netdev_actions_create(put->actions,
 								                                                   put->actions_len);
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            old_actions = dp_netdev_flow_get_actions(netdev_flow);
 								            ovsrcu_set(&netdev_flow->actions, new_actions);
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								            if (put->stats) {
 								                get_dpif_flow_stats(netdev_flow, put->stats);
 								            }
 								            if (put->flags & DPIF_FP_ZERO_STATS) {
 								                clear_stats(netdev_flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								            }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            ovsrcu_postpone(dp_netdev_actions_free, old_actions);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        } else if (put->flags & DPIF_FP_CREATE) {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								            error = EEXIST;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        } else {
 								            /* Overlapping flow. */
 								            error = EINVAL;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        }
 								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_unlock(&dp->flow_mutex);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												flow: Separate "flow_t" from "struct odp_flow_key".

The "struct odp_flow_key" used in the kernel datapath is conceptually
separate from the "flow_t" used in userspace, but until now we have
used the latter as a typedef for the former for convenience.  This commit
separates them.  This makes it possible in upcoming commits to change
them independently.

This is cross-ported from the "wdp" branch, which has had it for months.

											
										
										
											2010-10-11 13:31:35 -07:00
+								    struct flow key;
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    int error;
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								    error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
+								    if (error) {
 								        return error;
 								    }
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_lock(&dp->flow_mutex);
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_wrlock(&dp->cls.rwlock);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    netdev_flow = dp_netdev_find_flow(dp, &key);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    if (netdev_flow) {
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								        if (del->stats) {
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								            get_dpif_flow_stats(netdev_flow, del->stats);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								        }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        dp_netdev_remove_flow(dp, netdev_flow);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = ENOENT;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												classifier: Use fat_rwlock instead of ovs_rwlock.

Jarno Rajahalme reported up to 40% performance gain on netperf TCP_CRR with
an earlier version of this patch in combination with a kernel NUMA patch,
together with a reduction in variance:
    http://openvswitch.org/pipermail/dev/2014-January/035867.html

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-13 11:21:12 -08:00
+								    fat_rwlock_unlock(&dp->cls.rwlock);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_unlock(&dp->flow_mutex);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								struct dp_netdev_flow_state {
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								    struct dp_netdev_actions *actions;
-												odp-util: Replace ODPUTIL_FLOW_KEY_U32S by new struct odputil_keybuf.

This seems to me to better encapsulate the inherent ugliness.

											
										
										
											2011-03-02 13:25:10 -08:00
+								    struct odputil_keybuf keybuf;
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    struct odputil_keybuf maskbuf;
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								    struct dpif_flow_stats stats;
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								};
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								struct dp_netdev_flow_iter {
 								    uint32_t bucket;
 								    uint32_t offset;
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								    int status;
 								    struct ovs_mutex mutex;
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								};
 								static void
 								dpif_netdev_flow_dump_state_init(void **statep)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    struct dp_netdev_flow_state *state;
 								    *statep = state = xmalloc(sizeof *state);
 								    state->actions = NULL;
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								}
 								static void
 								dpif_netdev_flow_dump_state_uninit(void *state_)
 								{
 								    struct dp_netdev_flow_state *state = state_;
 								    free(state);
 								}
 								static int
 								dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **iterp)
 								{
 								    struct dp_netdev_flow_iter *iter;
 								    *iterp = iter = xmalloc(sizeof *iter);
 								    iter->bucket = 0;
 								    iter->offset = 0;
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								    iter->status = 0;
 								    ovs_mutex_init(&iter->mutex);
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    return 0;
 								}
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								/* XXX the caller must use 'actions' without quiescing */
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								static int
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                           const struct nlattr **key, size_t *key_len,
-												ovs-dpctl: Add mega flow support

Added support to allow mega flow specified and displayed. ovs-dpctl tool
is mainly used as debugging tool.

This patch also implements the low level user space routines to send
and receive mega flow netlink messages. Those netlink suppor
routines are required for forthcoming user space mega flow patches.

Added a unit test to test parsing and display of mega flows.

Ethan contributed the ovs-dpctl mega flow output function.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 07:15:10 +00:00
+								                           const struct nlattr **mask, size_t *mask_len,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                           const struct nlattr **actions, size_t *actions_len,
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								                           const struct dpif_flow_stats **stats)
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								{
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								    struct dp_netdev_flow_iter *iter = iter_;
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								    struct dp_netdev_flow_state *state = state_;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								    int error;
-												flow: Separate "flow_t" from "struct odp_flow_key".

The "struct odp_flow_key" used in the kernel datapath is conceptually
separate from the "flow_t" used in userspace, but until now we have
used the latter as a typedef for the former for convenience.  This commit
separates them.  This makes it possible in upcoming commits to change
them independently.

This is cross-ported from the "wdp" branch, which has had it for months.

											
										
										
											2010-10-11 13:31:35 -07:00
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								    ovs_mutex_lock(&iter->mutex);
 								    error = iter->status;
 								    if (!error) {
 								        struct hmap_node *node;
 								        fat_rwlock_rdlock(&dp->cls.rwlock);
 								        node = hmap_at_position(&dp->flow_table, &iter->bucket, &iter->offset);
 								        if (node) {
 								            netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
 								        }
 								        fat_rwlock_unlock(&dp->cls.rwlock);
 								        if (!node) {
 								            iter->status = error = EOF;
 								        }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    }
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								    ovs_mutex_unlock(&iter->mutex);
 								    if (error) {
 								        return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    if (key) {
 								        struct ofpbuf buf;
-												odp-util: Replace ODPUTIL_FLOW_KEY_U32S by new struct odputil_keybuf.

This seems to me to better encapsulate the inherent ugliness.

											
										
										
											2011-03-02 13:25:10 -08:00
+								        ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								        odp_flow_key_from_flow(&buf, &netdev_flow->flow,
 								                               netdev_flow->flow.in_port.odp_port);
-												datapath: Convert odp_flow_key to use Netlink attributes instead.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This
commit makes that change using Netlink attribute sequences.

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, because userspace doesn't yet
have enough information to do that intelligently.  Upcoming commits will
fix that.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 18:44:44 -08:00
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								        *key = buf.data;
 								        *key_len = buf.size;
 								    }
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
+								    if (key && mask) {
 								        struct ofpbuf buf;
 								        struct flow_wildcards wc;
 								        ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf);
 								        minimask_expand(&netdev_flow->cr.match.mask, &wc);
 								        odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
-												Enhance userspace support for MPLS, for up to 3 labels.

This commit makes the userspace support for MPLS more complete.  Now
up to 3 labels are supported.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Co-authored-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Simon Horman <horms@verge.net.au>

											
										
										
											2014-02-04 10:32:35 -08:00
+								                               odp_to_u32(wc.masks.in_port.odp_port),
 								                               SIZE_MAX);
-												dpif-netdev: Introduce a classifier in userspace datapath.

Instead of an exact match flow table, we introduce a classifier.
This enables mega-flows in userspace datapath.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
[blp@nicira.com tweaked flow lookup]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-04 06:23:54 -08:00
 								        *mask = buf.data;
 								        *mask_len = buf.size;
-												ovs-dpctl: Add mega flow support

Added support to allow mega flow specified and displayed. ovs-dpctl tool
is mainly used as debugging tool.

This patch also implements the low level user space routines to send
and receive mega flow netlink messages. Those netlink suppor
routines are required for forthcoming user space mega flow patches.

Added a unit test to test parsing and display of mega flows.

Ethan contributed the ovs-dpctl mega flow output function.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 07:15:10 +00:00
+								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    if (actions || stats) {
 								        state->actions = NULL;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        if (actions) {
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								            state->actions = dp_netdev_flow_get_actions(netdev_flow);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								            *actions = state->actions->actions;
 								            *actions_len = state->actions->size;
 								        }
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        if (stats) {
 								            get_dpif_flow_stats(netdev_flow, &state->stats);
 								            *stats = &state->stats;
 								        }
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    }
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
 								    return 0;
 								}
 								static int
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *iter_)
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								{
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								    struct dp_netdev_flow_iter *iter = iter_;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								    ovs_mutex_destroy(&iter->mutex);
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								    free(iter);
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    return 0;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    struct pkt_metadata *md = &execute->md;
 								    struct flow key;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif: Change provider interface to consistently use operation structs.

Until now, a "flow put" has represented its parameters in two different
ways, depending on whether it was coming from dpif_flow_put() or from
dpif_operate(), and similarly for an "execute" operation.  This commit
adopts the operation struct consistently within the dpif provider
interface, which seems cleaner.

This commit also factors out logging for flow puts and executes, which
is useful in the following commit.

This doesn't change the dpif client interface, since the two forms are
more convenient for clients than always filling out an operation struct.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:39:03 -08:00
+								    if (execute->packet->size < ETH_HEADER_LEN ||
 								        execute->packet->size > UINT16_MAX) {
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        return EINVAL;
 								    }
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    /* Extract flow key. */
-												lib: simplify flow_extract() API

Change the flow_extract() API to accept struct pkt_metadata,
instead of individual metadata fields. It will make the API more
logical and easier to maintain when we need to expand metadata
down the road.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>¬

											
										
										
											2014-02-26 18:08:04 -08:00
+								    flow_extract(execute->packet, md, &key);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
 								    ovs_rwlock_rdlock(&dp->port_rwlock);
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								    dp_netdev_execute_actions(dp, &key, execute->packet, false, md,
 								                              execute->actions, execute->actions_len);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_unlock(&dp->port_rwlock);
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    return 0;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								static void
 								dp_netdev_destroy_all_queues(struct dp_netdev *dp)
 								    OVS_REQ_WRLOCK(dp->queue_rwlock)
 								{
 								    size_t i;
 								    dp_netdev_purge_queues(dp);
 								    for (i = 0; i < dp->n_handlers; i++) {
 								        struct dp_netdev_queue *q = &dp->handler_queues[i];
 								        ovs_mutex_destroy(&q->mutex);
 								        seq_destroy(q->seq);
 								    }
 								    free(dp->handler_queues);
 								    dp->handler_queues = NULL;
 								    dp->n_handlers = 0;
 								}
 								static void
 								dp_netdev_refresh_queues(struct dp_netdev *dp, uint32_t n_handlers)
 								    OVS_REQ_WRLOCK(dp->queue_rwlock)
 								{
 								    if (dp->n_handlers != n_handlers) {
 								        size_t i;
 								        dp_netdev_destroy_all_queues(dp);
 								        dp->n_handlers = n_handlers;
 								        dp->handler_queues = xzalloc(n_handlers * sizeof *dp->handler_queues);
 								        for (i = 0; i < n_handlers; i++) {
 								            struct dp_netdev_queue *q = &dp->handler_queues[i];
 								            ovs_mutex_init(&q->mutex);
 								            q->seq = seq_create();
 								        }
 								    }
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static int
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								dpif_netdev_recv_set(struct dpif *dpif, bool enable)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    if ((dp->handler_queues != NULL) == enable) {
 								        return 0;
 								    }
 								    fat_rwlock_wrlock(&dp->queue_rwlock);
 								    if (!enable) {
 								        dp_netdev_destroy_all_queues(dp);
 								    } else {
 								        dp_netdev_refresh_queues(dp, 1);
 								    }
 								    fat_rwlock_unlock(&dp->queue_rwlock);
-												Eliminate ODPL_* from userspace-facing interface.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:14:04 -08:00
+								    return 0;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								static int
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								dpif_netdev_handlers_set(struct dpif *dpif, uint32_t n_handlers)
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								{
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    fat_rwlock_wrlock(&dp->queue_rwlock);
 								    if (dp->handler_queues) {
 								        dp_netdev_refresh_queues(dp, n_handlers);
 								    }
 								    fat_rwlock_unlock(&dp->queue_rwlock);
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								    return 0;
 								}
-												dpif-netdev: Allow enqueue actions.

The dpif-netdev implementation disallowed enqueue actions because
it did not support conversion from OVS 'queue_id' to dpif
'priority'.  For testing purposes, this patch allows queues which
translate into NOOPs.

											
										
										
											2011-11-21 13:36:17 -08:00
+								static int
 								dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
 								                              uint32_t queue_id, uint32_t *priority)
 								{
 								    *priority = queue_id;
 								    return 0;
 								}
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								static bool
 								dp_netdev_recv_check(const struct dp_netdev *dp, const uint32_t handler_id)
 								    OVS_REQ_RDLOCK(dp->queue_rwlock)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    if (!dp->handler_queues) {
 								        VLOG_WARN_RL(&rl, "receiving upcall disabled");
 								        return false;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
 								    if (handler_id >= dp->n_handlers) {
 								        VLOG_WARN_RL(&rl, "handler index out of bound");
 								        return false;
 								    }
 								    return true;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static int
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								dpif_netdev_recv(struct dpif *dpif, uint32_t handler_id,
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								                 struct dpif_upcall *upcall, struct ofpbuf *buf)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
+								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								    struct dp_netdev_queue *q;
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    int error = 0;
 								    fat_rwlock_rdlock(&dp->queue_rwlock);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    if (!dp_netdev_recv_check(dp, handler_id)) {
 								        error = EAGAIN;
 								        goto out;
 								    }
 								    q = &dp->handler_queues[handler_id];
 								    ovs_mutex_lock(&q->mutex);
 								    if (q->head != q->tail) {
-												dpif-netdev: Eliminate two malloc() calls per packet sent to "userspace".

This is easy enough, so it seems worthwhile now that FreeBSD is starting
to make more use of the "userspace switch".

CC: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-15 16:05:31 -07:00
+								        struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
 								        *upcall = u->upcall;
-												queue: Get rid of ovs_queue data structure.

ovs_queue doesn't seem very useful; it's just a singly-linked list.  It's
more generally useful to use a general-purpose "struct list" for lists of
packets, so this commit adds such a member to "struct ofpbuf" and shifts
the existing users to use it.

											
										
										
											2010-12-06 10:03:31 -08:00
-												dpif: Make caller of dpif_recv() provide buffer space.

This improves performance under heavy flow setup loads.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-06 16:23:28 -07:00
+								        ofpbuf_uninit(buf);
-												dpif-netdev: Eliminate two malloc() calls per packet sent to "userspace".

This is easy enough, so it seems worthwhile now that FreeBSD is starting
to make more use of the "userspace switch".

CC: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-15 16:05:31 -07:00
+								        *buf = u->buf;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    } else {
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
+								        error = EAGAIN;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    ovs_mutex_unlock(&q->mutex);
 								out:
 								    fat_rwlock_unlock(&dp->queue_rwlock);
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								static void
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    struct dp_netdev *dp = get_dp_netdev(dpif);
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    struct dp_netdev_queue *q;
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    uint64_t seq;
-												dpif-netdev: Make internally thread-safe by introducing a global mutex.

This can be improved later but it is the simple thing to do for now.

I marked a couple of races with XXX.  I don't have a really good solution
for these, but I hope to find one.  They may be harmless in practice.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-23 16:56:26 -07:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    fat_rwlock_rdlock(&dp->queue_rwlock);
 								    if (!dp_netdev_recv_check(dp, handler_id)) {
 								        goto out;
 								    }
 								    q = &dp->handler_queues[handler_id];
 								    ovs_mutex_lock(&q->mutex);
 								    seq = seq_read(q->seq);
 								    if (q->head != q->tail) {
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								        poll_immediate_wake();
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    } else {
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								        seq_wait(q->seq, seq);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
 								    ovs_mutex_unlock(&q->mutex);
 								out:
 								    fat_rwlock_unlock(&dp->queue_rwlock);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
 								static void
 								dpif_netdev_recv_purge(struct dpif *dpif)
 								{
 								    struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    fat_rwlock_wrlock(&dpif_netdev->dp->queue_rwlock);
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								    dp_netdev_purge_queues(dpif_netdev->dp);
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    fat_rwlock_unlock(&dpif_netdev->dp->queue_rwlock);
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
 								 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
 								 * 'ofpacts'. */
 								struct dp_netdev_actions *
 								dp_netdev_actions_create(const struct nlattr *actions, size_t size)
 								{
 								    struct dp_netdev_actions *netdev_actions;
 								    netdev_actions = xmalloc(sizeof *netdev_actions);
 								    netdev_actions->actions = xmemdup(actions, size);
 								    netdev_actions->size = size;
 								    return netdev_actions;
 								}
 								struct dp_netdev_actions *
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow)
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								{
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    return ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								}
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								static void
 								dp_netdev_actions_free(struct dp_netdev_actions *actions)
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								{
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								    free(actions->actions);
 								    free(actions);
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								}
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
 								inline static void
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								dp_netdev_process_rxq_port(struct dp_netdev *dp,
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								                          struct dp_netdev_port *port,
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								                          struct netdev_rxq *rxq)
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								{
 								    struct ofpbuf *packet[NETDEV_MAX_RX_BATCH];
 								    int error, c;
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								    error = netdev_rxq_recv(rxq, packet, &c);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    if (!error) {
 								        struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no);
 								        int i;
 								        for (i = 0; i < c; i++) {
 								            dp_netdev_port_input(dp, packet[i], &md);
 								        }
 								    } else if (error != EAGAIN && error != EOPNOTSUPP) {
 								        static struct vlog_rate_limit rl
 								            = VLOG_RATE_LIMIT_INIT(1, 5);
 								        VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
 								                    netdev_get_name(port->netdev),
 								                    ovs_strerror(error));
 								    }
 								}
 								static void
 								dpif_netdev_run(struct dpif *dpif)
 								{
 								    struct dp_netdev_port *port;
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    ovs_rwlock_rdlock(&dp->port_rwlock);
 								    HMAP_FOR_EACH (port, node, &dp->ports) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								        if (!netdev_is_pmd(port->netdev)) {
 								            int i;
 								            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
 								                dp_netdev_process_rxq_port(dp, port, port->rxq[i]);
 								            }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        }
 								    }
 								    ovs_rwlock_unlock(&dp->port_rwlock);
 								}
 								static void
 								dpif_netdev_wait(struct dpif *dpif)
 								{
 								    struct dp_netdev_port *port;
 								    struct dp_netdev *dp = get_dp_netdev(dpif);
 								    ovs_rwlock_rdlock(&dp->port_rwlock);
 								    HMAP_FOR_EACH (port, node, &dp->ports) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								        if (!netdev_is_pmd(port->netdev)) {
 								            int i;
 								            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
 								                netdev_rxq_wait(port->rxq[i]);
 								            }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        }
 								    }
 								    ovs_rwlock_unlock(&dp->port_rwlock);
 								}
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								struct rxq_poll {
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    struct dp_netdev_port *port;
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								    struct netdev_rxq *rx;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								};
 								static int
 								pmd_load_queues(struct pmd_thread *f,
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								                struct rxq_poll **ppoll_list, int poll_cnt)
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								{
 								    struct dp_netdev *dp = f->dp;
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								    struct rxq_poll *poll_list = *ppoll_list;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    struct dp_netdev_port *port;
 								    int id = f->id;
 								    int index;
 								    int i;
 								    /* Simple scheduler for netdev rx polling. */
 								    ovs_rwlock_rdlock(&dp->port_rwlock);
 								    for (i = 0; i < poll_cnt; i++) {
 								         port_unref(poll_list[i].port);
 								    }
 								    poll_cnt = 0;
 								    index = 0;
 								    HMAP_FOR_EACH (port, node, &f->dp->ports) {
 								        if (netdev_is_pmd(port->netdev)) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								            int i;
 								            for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
 								                if ((index % dp->n_pmd_threads) == id) {
 								                    poll_list = xrealloc(poll_list, sizeof *poll_list * (poll_cnt + 1));
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								                    port_ref(port);
 								                    poll_list[poll_cnt].port = port;
 								                    poll_list[poll_cnt].rx = port->rxq[i];
 								                    poll_cnt++;
 								                }
 								                index++;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								            }
 								        }
 								    }
 								    ovs_rwlock_unlock(&dp->port_rwlock);
 								    *ppoll_list = poll_list;
 								    return poll_cnt;
 								}
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								static void *
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								pmd_thread_main(void *f_)
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								{
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    struct pmd_thread *f = f_;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    struct dp_netdev *dp = f->dp;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    unsigned int lc = 0;
-												netdev: Rename netdev_rx to netdev_rxq

Preparation for multi queue netdev IO.  There are no functional changes
in this patch.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 19:38:14 -07:00
+								    struct rxq_poll *poll_list;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    unsigned int port_seq;
 								    int poll_cnt;
 								    int i;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    f->name = xasprintf("pmd_%u", ovsthread_id_self());
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    set_subprogram_name("%s", f->name);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    poll_cnt = 0;
 								    poll_list = NULL;
-												netdev-dpdk: Use multiple core for dpdk IO.

DPDK need to set _lcore_id for using multiple core.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 22:07:44 -07:00
+								    pmd_thread_setaffinity_cpu(f->id);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								reload:
 								    poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt);
 								    atomic_read(&f->change_seq, &port_seq);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    for (;;) {
 								        unsigned int c_port_seq;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								        int i;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        for (i = 0; i < poll_cnt; i++) {
-												netdev: Add support multiqueue recv.

new netdev type like DPDK can support multi-queue IO. Following
patch Adds support for same.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 20:52:06 -07:00
+								            dp_netdev_process_rxq_port(dp,  poll_list[i].port, poll_list[i].rx);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        }
 								        if (lc++ > 1024) {
 								            ovsrcu_quiesce();
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								            /* TODO: need completely userspace based signaling method.
 								             * to keep this thread entirely in userspace.
 								             * For now using atomic counter. */
 								            lc = 0;
 								            atomic_read_explicit(&f->change_seq, &c_port_seq, memory_order_consume);
 								            if (c_port_seq != port_seq) {
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								                break;
 								            }
 								        }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    }
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    if (!latch_is_set(&f->dp->exit_latch)){
 								        goto reload;
 								    }
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    for (i = 0; i < poll_cnt; i++) {
 								         port_unref(poll_list[i].port);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    }
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    free(poll_list);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    free(f->name);
 								    return NULL;
 								}
 								static void
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n)
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								{
 								    int i;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    if (n == dp->n_pmd_threads) {
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								        return;
 								    }
 								    /* Stop existing threads. */
 								    latch_set(&dp->exit_latch);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    dp_netdev_reload_pmd_threads(dp);
 								    for (i = 0; i < dp->n_pmd_threads; i++) {
 								        struct pmd_thread *f = &dp->pmd_threads[i];
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
 								        xpthread_join(f->thread, NULL);
 								    }
 								    latch_poll(&dp->exit_latch);
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    free(dp->pmd_threads);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
 								    /* Start new threads. */
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    dp->pmd_threads = xmalloc(n * sizeof *dp->pmd_threads);
 								    dp->n_pmd_threads = n;
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    for (i = 0; i < n; i++) {
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        struct pmd_thread *f = &dp->pmd_threads[i];
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
 								        f->dp = dp;
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								        f->id = i;
 								        atomic_store(&f->change_seq, 1);
 								        /* Each thread will distribute all devices rx-queues among
 								         * themselves. */
 								        xpthread_create(&f->thread, NULL, pmd_thread_main, f);
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
+								    }
 								}
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
-												dpif-netdev: Use separate threads for forwarding.

For now, we use exactly two threads.  Presumably at some point we will want
to make this configurable.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 17:00:30 -08:00
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								static void *
 								dp_netdev_flow_stats_new_cb(void)
 								{
 								    struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
 								    ovs_mutex_init(&bucket->mutex);
 								    return bucket;
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
-												dpif-netdev: Use packet key to parse TCP flags.

The flow that created the netdev_flow might have wildcarded TCP flags,
or it may not be a TCP flow at all.  Fix this by using the freshly
extracted flow key to parse TCP flags.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-19 16:13:32 -07:00
+								                    const struct ofpbuf *packet,
 								                    const struct flow *key)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												lib: Use tcp_flags from flow.

TCP flags are already extracted from the flow, no need to parse them
again.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-19 16:13:32 -07:00
+								    uint16_t tcp_flags = ntohs(key->tcp_flags);
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
+								    long long int now = time_msec();
 								    struct dp_netdev_flow_stats *bucket;
 								    bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
 								                                        dp_netdev_flow_stats_new_cb);
 								    ovs_mutex_lock(&bucket->mutex);
 								    bucket->used = MAX(now, bucket->used);
 								    bucket->packet_count++;
 								    bucket->byte_count += packet->size;
 								    bucket->tcp_flags |= tcp_flags;
 								    ovs_mutex_unlock(&bucket->mutex);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								static void *
 								dp_netdev_stats_new_cb(void)
 								{
 								    struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
 								    ovs_mutex_init(&bucket->mutex);
 								    return bucket;
 								}
 								static void
 								dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
 								{
 								    struct dp_netdev_stats *bucket;
 								    bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
 								    ovs_mutex_lock(&bucket->mutex);
 								    bucket->n[type]++;
 								    ovs_mutex_unlock(&bucket->mutex);
 								}
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								static void
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
 								                     struct pkt_metadata *md)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    struct dp_netdev_flow *netdev_flow;
-												flow: Separate "flow_t" from "struct odp_flow_key".

The "struct odp_flow_key" used in the kernel datapath is conceptually
separate from the "flow_t" used in userspace, but until now we have
used the latter as a typedef for the former for convenience.  This commit
separates them.  This makes it possible in upcoming commits to change
them independently.

This is cross-ported from the "wdp" branch, which has had it for months.

											
										
										
											2010-10-11 13:31:35 -07:00
+								    struct flow key;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												dpif-netdev: Tolerate undersized packets.

Actions that modify packets need to tolerate packets that are too small.
Most of the actions already implicitly do this check, since they check for
appropriate values in the flow key that would only be there if the
corresponding data was present.  But actions to modify the Ethernet header
didn't have a guarantee that the packet was at least 14 bytes long, and
actions to modify the VLAN didn't have such a guarantee either, so this
adds appropriate checks.

Problem found by code inspection.

											
										
										
											2010-08-10 11:38:55 -07:00
+								    if (packet->size < ETH_HEADER_LEN) {
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								        ofpbuf_delete(packet);
-												dpif-netdev: Tolerate undersized packets.

Actions that modify packets need to tolerate packets that are too small.
Most of the actions already implicitly do this check, since they check for
appropriate values in the flow key that would only be there if the
corresponding data was present.  But actions to modify the Ethernet header
didn't have a guarantee that the packet was at least 14 bytes long, and
actions to modify the VLAN didn't have such a guarantee either, so this
adds appropriate checks.

Problem found by code inspection.

											
										
										
											2010-08-10 11:38:55 -07:00
+								        return;
 								    }
-												lib: simplify flow_extract() API

Change the flow_extract() API to accept struct pkt_metadata,
instead of individual metadata fields. It will make the API more
logical and easier to maintain when we need to expand metadata
down the road.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>¬

											
										
										
											2014-02-26 18:08:04 -08:00
+								    flow_extract(packet, md, &key);
-												dpif-netdev: Change a variable name.

'struct dp_netdev_flow' is currently being instantiated as 'flow'.
An upcoming commit introduces a classifier to dpif-netdev
which uses 'struct flow' at a few places and that can cause
confusion while reading code.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-29 02:34:15 -07:00
+								    netdev_flow = dp_netdev_lookup_flow(dp, &key);
 								    if (netdev_flow) {
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								        struct dp_netdev_actions *actions;
-												dpif-netdev: Use packet key to parse TCP flags.

The flow that created the netdev_flow might have wildcarded TCP flags,
or it may not be a TCP flow at all.  Fix this by using the freshly
extracted flow key to parse TCP flags.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-19 16:13:32 -07:00
+								        dp_netdev_flow_used(netdev_flow, packet, &key);
-												dpif-netdev: Use ovsthread_stats for flow stats.

This should scale better than a single mutex, though still not
ideally.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-22 16:03:10 -08:00
-												dpif-netdev: Use RCU to protect data.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-05 22:41:30 -08:00
+								        actions = dp_netdev_flow_get_actions(netdev_flow);
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								        dp_netdev_execute_actions(dp, &key, packet, true, md,
-												dpif-netdev: Break actions out into new struct dp_netdev_actions.

This is analogous to the split between rule and rule_actions in
ofproto.  As there, it will allow retaining a reference to a rule's
actions, while processing them, without having to retain a reference
to the rule itself.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 14:37:13 -08:00
+								                                  actions->actions, actions->size);
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								        dp_netdev_count_packet(dp, DP_STAT_HIT);
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    } else if (dp->handler_queues) {
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								        dp_netdev_count_packet(dp, DP_STAT_MISS);
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								        dp_netdev_output_userspace(dp, packet,
 								                                   flow_hash_5tuple(&key, 0) % dp->n_handlers,
 								                                   DPIF_UC_MISS, &key, NULL);
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								        ofpbuf_delete(packet);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    }
 								}
 								static int
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								                           int queue_no, int type, const struct flow *flow,
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								                           const struct nlattr *userdata)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    struct dp_netdev_queue *q;
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
+								    int error;
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    fat_rwlock_rdlock(&dp->queue_rwlock);
 								    q = &dp->handler_queues[queue_no];
 								    ovs_mutex_lock(&q->mutex);
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								    if (q->head - q->tail < MAX_QUEUE_LEN) {
 								        struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
 								        struct dpif_upcall *upcall = &u->upcall;
 								        struct ofpbuf *buf = &u->buf;
 								        size_t buf_size;
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								        upcall->type = type;
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
 								        /* Allocate buffer big enough for everything. */
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								        buf_size = ODPUTIL_FLOW_KEY_BYTES;
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								        if (userdata) {
 								            buf_size += NLA_ALIGN(userdata->nla_len);
 								        }
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								        buf_size += packet->size;
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								        ofpbuf_init(buf, buf_size);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								        /* Put ODP flow. */
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								        odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port);
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								        upcall->key = buf->data;
 								        upcall->key_len = buf->size;
-												dpif-netdev: Eliminate two malloc() calls per packet sent to "userspace".

This is easy enough, so it seems worthwhile now that FreeBSD is starting
to make more use of the "userspace switch".

CC: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-15 16:05:31 -07:00
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								        /* Put userdata. */
 								        if (userdata) {
 								            upcall->userdata = ofpbuf_put(buf, userdata,
 								                                          NLA_ALIGN(userdata->nla_len));
 								        }
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								        upcall->packet.data = ofpbuf_put(buf, packet->data, packet->size);
 								        upcall->packet.size = packet->size;
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								        seq_change(q->seq);
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
+								        error = 0;
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								    } else {
-												ovs-thread: Replace ovsthread_counter by more general ovsthread_stats.

This allows clients to do more than just increment a counter.  The
following commit will make the first use of that feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-19 07:47:12 -07:00
+								        dp_netdev_count_packet(dp, DP_STAT_LOST);
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
+								        error = ENOBUFS;
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								    }
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								    ovs_mutex_unlock(&q->mutex);
 								    fat_rwlock_unlock(&dp->queue_rwlock);
-												dpif-netdev: Introduce new mutex to protect queues.

This is a first step in making thread safety more granular in dpif-netdev,
to allow for multithreaded forwarding.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-27 09:42:51 -08:00
 								    return error;
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								struct dp_netdev_execute_aux {
 								    struct dp_netdev *dp;
 								    const struct flow *key;
 								};
 								static void
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								dp_execute_cb(void *aux_, struct ofpbuf *packet,
 								              const struct pkt_metadata *md OVS_UNUSED,
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								              const struct nlattr *a, bool may_steal)
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    OVS_NO_THREAD_SAFETY_ANALYSIS
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								{
 								    struct dp_netdev_execute_aux *aux = aux_;
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    int type = nl_attr_type(a);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    struct dp_netdev_port *p;
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    switch ((enum ovs_action_attr)type) {
 								    case OVS_ACTION_ATTR_OUTPUT:
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
 								        if (p) {
-												netdev: Send ofpbuf directly to netdev.

DPDK netdev need to access ofpbuf while sending buffer. Following
patch changes netdev_send accordingly.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:56:51 -07:00
+								            netdev_send(p->netdev, packet, may_steal);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        }
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								        break;
 								    case OVS_ACTION_ATTR_USERSPACE: {
 								        const struct nlattr *userdata;
-												odp-execute: Refine signatures for odp_execute_actions() callbacks.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-20 12:47:33 -07:00
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								        userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
-												dpif-netdev: Implement the API functions to allow multiple handler
threads read upcall.

This commit implements the API functions to allow multiple handler
threads read upcall.

Also, this commit removes the handling priority of DPIF_UC_MISS
over DPIF_UC_ACTION.  So, both misses will be put to the same
queue.  The decision is based on the fact that a lot has changed
since the age when flow setup rate is most treasured and starving
all actions in the presence of any flow misses doesn't seem like
a sound balancing solution.

Thusly the current implementation will be put in testing and
investigation for better balancing solution will continue if
there is an issue.

Also note, the introduction and use of flow_hash_5tuple() will
put missed ICMP packets from same source but with different
type/code to different handler queues.  This may cause reordering
of these packets.  For now, we do not count this as a problem.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:07:38 -08:00
+								        dp_netdev_output_userspace(aux->dp, packet,
 								                                   flow_hash_5tuple(aux->key, 0)
 								                                       % aux->dp->n_handlers,
 								                                   DPIF_UC_ACTION, aux->key,
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								                                   userdata);
-												netdev: Send ofpbuf directly to netdev.

DPDK netdev need to access ofpbuf while sending buffer. Following
patch changes netdev_send accordingly.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:56:51 -07:00
 								        if (may_steal) {
 								            ofpbuf_delete(packet);
 								        }
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								        break;
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								    }
-												odp-execute: Consolidate callbacks.

Use one callback instead of many, helps in adding new functionality
later on.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								    case OVS_ACTION_ATTR_PUSH_VLAN:
 								    case OVS_ACTION_ATTR_POP_VLAN:
 								    case OVS_ACTION_ATTR_PUSH_MPLS:
 								    case OVS_ACTION_ATTR_POP_MPLS:
 								    case OVS_ACTION_ATTR_SET:
 								    case OVS_ACTION_ATTR_SAMPLE:
 								    case OVS_ACTION_ATTR_UNSPEC:
 								    case __OVS_ACTION_ATTR_MAX:
 								        OVS_NOT_REACHED();
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								    }
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
-												datapath: Move Netlink PID for userspace actions from flows to actions.

Commit b063d9f06 "datapath: Use unicast Netlink sockets for upcalls" that
switched from multicast to unicast Netlink for sending upcalls added a
Netlink PID to each kernel flow, used by OVS_ACTION_ATTR_USERSPACE actions
within the flow as target.

This commit drops this per-flow PID in favor of a per-action PID, because
that is more flexible.  It does not yet make use of this additional
flexibility, so behavior should not change.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Bug #7559.

											
										
										
											2011-10-12 16:24:54 -07:00
+								}
-												datapath: Refactor actions in terms of match fields.

Almost all current actions can be expressed in the form of
push/pop/set <field>, where field is one of the match fields. We can
create three base actions and take a field. This has both a nice
symmetry and avoids inconsistencies where we can match on the vlan
TPID but not set it.
Following patch converts all actions to this new format.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

Bug #7115

											
										
										
											2011-10-21 14:38:54 -07:00
+								static void
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								dp_netdev_execute_actions(struct dp_netdev *dp, const struct flow *key,
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								                          struct ofpbuf *packet, bool may_steal,
 								                          struct pkt_metadata *md,
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								                          const struct nlattr *actions, size_t actions_len)
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								{
-												dpif-netdev: Maintain the original key during execution.

Userspace action needs the original flow key.  This also
matches the kernel datapath behavior.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-14 14:35:58 -08:00
+								    struct dp_netdev_execute_aux aux = {dp, key};
-												netdev: Extend rx_recv to pass multiple packets.

DPDK can receive multiple packets but current netdev API does
not allow that.  Following patch allows dpif-netdev receive batch
of packet in a rx_recv() call for any netdev port.  This will be
used by dpdk-netdev.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-03-20 10:54:37 -07:00
+								    odp_execute_actions(&aux, packet, may_steal, md,
 								                        actions, actions_len, dp_execute_cb);
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								}
 								const struct dpif_class dpif_netdev_class = {
 								    "netdev",
-												dpif-netdev: allow for proper destruction of netdev datapaths

Until now, bridges with datapath_type=netdev did not destroy the datapath
when deleted. In particular, the tap device implementing the internal
interface was not close()d, and therefore the tap persists until
ovs-vswitchd exit()s.

This behaviour was caused by the missing callback for 'enumerate' in the
dpif-netdev class. Without this callback 'bridge_reconfigure' failed to
realize that there are datapaths with no bridge, and thus cannot destroy
them. Providing an 'enumerate' callback fixes this.

Signed-off-by: Giuseppe Lettieri <g.lettieri@iet.unipi.it>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-09 12:17:15 +02:00
+								    dpif_netdev_enumerate,
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								    dpif_netdev_port_open_type,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif_netdev_open,
 								    dpif_netdev_close,
-												Fix some regressions from the merge from master.

											
										
										
											2010-02-08 13:22:41 -05:00
+								    dpif_netdev_destroy,
-												dpif-netdev: Add poll-mode-device thread.

This patch adds PMD type netdev for netdevice with poll-mode
drivers.  Since there is no way to get signal on a packet recv
from these devices we need to poll them in busy loop.  So minimize
system call overhead this patch uses dpif-thread exclusively
for PMD devices and rest of devices which needs system calls to
do IO are moved to dpif-netdev-run().
PMD device like DPDK work in userspace so there is no system call
overhead for them.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>

											
										
										
											2014-03-20 10:57:41 -07:00
+								    dpif_netdev_run,
 								    dpif_netdev_wait,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif_netdev_get_stats,
 								    dpif_netdev_port_add,
 								    dpif_netdev_port_del,
 								    dpif_netdev_port_query_by_number,
 								    dpif_netdev_port_query_by_name,
-												datapath: Move Netlink PID for userspace actions from flows to actions.

Commit b063d9f06 "datapath: Use unicast Netlink sockets for upcalls" that
switched from multicast to unicast Netlink for sending upcalls added a
Netlink PID to each kernel flow, used by OVS_ACTION_ATTR_USERSPACE actions
within the flow as target.

This commit drops this per-flow PID in favor of a per-action PID, because
that is more flexible.  It does not yet make use of this additional
flexibility, so behavior should not change.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Bug #7559.

											
										
										
											2011-10-12 16:24:54 -07:00
+								    NULL,                       /* port_get_pid */
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    dpif_netdev_port_dump_start,
 								    dpif_netdev_port_dump_next,
 								    dpif_netdev_port_dump_done,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif_netdev_port_poll,
 								    dpif_netdev_port_poll_wait,
 								    dpif_netdev_flow_get,
 								    dpif_netdev_flow_put,
 								    dpif_netdev_flow_del,
 								    dpif_netdev_flow_flush,
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								    dpif_netdev_flow_dump_state_init,
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    dpif_netdev_flow_dump_start,
 								    dpif_netdev_flow_dump_next,
-												dpif: New function flow_dump_next_may_destroy_keys().

This new function allows callers to determine whether previously
returned keys will be modified or reallocated on the next call to
dpif_flow_dump_next(). This will be used in a future commit to allow
batched flow deletion by revalidator threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:10 -08:00
+								    NULL,
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    dpif_netdev_flow_dump_done,
-												dpif: Separate local and shared flow dump state.

This patch separates the structures for thread-local flow dump state
("state") from the shared flow dump state ("iter") in dpif-linux and
dpif-netdev. Future patches will make use of this to allow multiple
threads to dump flows from the same flow dump operation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:07 -08:00
+								    dpif_netdev_flow_dump_state_uninit,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif_netdev_execute,
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								    NULL,                       /* operate */
-												dpif: Simplify the "listen mask" concept.

At one point in the past, there were three separate queues between the
kernel module and OVS userspace, each of which corresponded to a Netlink
socket (or, before that, to a character device).  It made sense to allow
each of these to be enabled or disabled separately, hence the "listen mask"
concept in the dpif layer.

These days, the concept is much less clear-cut.  Queuing is no longer on
the basis of different classes of packets but instead striped across a
collection of sockets based on input port.  It doesn't really make sense
to enable receiving packets on the basis of the kind of packet anymore.
Accordingly, this commit simplifies the "listen_mask" to just a bool that
either enables or disables receiving packets.

It could be useful to enable or disable receiving packets on a per-vport
basis, but the rest of the code isn't ready to make use of that so this
commit doesn't generalize this much.

Based on this discussion on ovs-dev:
http://openvswitch.org/pipermail/dev/2011-October/012044.html

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 17:09:22 -08:00
+								    dpif_netdev_recv_set,
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								    dpif_netdev_handlers_set,
-												dpif-netdev: Allow enqueue actions.

The dpif-netdev implementation disallowed enqueue actions because
it did not support conversion from OVS 'queue_id' to dpif
'priority'.  For testing purposes, this patch allows queues which
translate into NOOPs.

											
										
										
											2011-11-21 13:36:17 -08:00
+								    dpif_netdev_queue_to_priority,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								    dpif_netdev_recv,
 								    dpif_netdev_recv_wait,
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								    dpif_netdev_recv_purge,
-												New implementation of userspace datapath, based on the netdev library.

											
										
										
											2009-06-19 14:09:39 -07:00
+								};
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								static void
 								dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                              const char *argv[], void *aux OVS_UNUSED)
 								{
 								    struct dp_netdev_port *port;
 								    struct dp_netdev *dp;
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    odp_port_t port_no;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_mutex_lock(&dp_netdev_mutex);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    dp = shash_find_data(&dp_netdevs, argv[1]);
 								    if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        ovs_mutex_unlock(&dp_netdev_mutex);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								        unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
 								        return;
 								    }
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_refcount_ref(&dp->ref_cnt);
 								    ovs_mutex_unlock(&dp_netdev_mutex);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								    ovs_rwlock_wrlock(&dp->port_rwlock);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    if (get_port_by_name(dp, argv[2], &port)) {
 								        unixctl_command_reply_error(conn, "unknown port");
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        goto exit;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    }
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    port_no = u32_to_odp(atoi(argv[3]));
 								    if (!port_no || port_no == ODPP_NONE) {
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								        unixctl_command_reply_error(conn, "bad port number");
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        goto exit;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    }
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    if (dp_netdev_lookup_port(dp, port_no)) {
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								        unixctl_command_reply_error(conn, "port number already in use");
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
+								        goto exit;
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    }
-												dpif-netdev: Use hmap instead of list+array for tracking ports.

The goal is to make it easy to divide the ports into groups for handling
by threads.  It seems easy enough to do that by hash value, and a little
harder otherwise.

This commit has the side effect of raising the maximum number of ports from
256 to UINT32_MAX-1.  That is why some tests need to be updated:
previously, internally generated port names like "ovs_vxlan_4341" were
ignored because 4341 is bigger than the previous limit of 256.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-24 16:08:57 -08:00
+								    hmap_remove(&dp->ports, &port->node);
 								    port->port_no = port_no;
 								    hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
-												dpif-netdev: Avoid races on queue and port changes using seq objects.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-07 13:29:54 -07:00
+								    seq_change(dp->port_seq);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								    unixctl_command_reply(conn, NULL);
-												dpif-netdev: Make thread-safety much more granular.

This will allow for parallelism in multithreaded forwarding in an upcoming
commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-01-08 15:58:11 -08:00
 								exit:
 								    ovs_rwlock_unlock(&dp->port_rwlock);
 								    dp_netdev_unref(dp);
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
+								}
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								static void
 								dpif_dummy_register__(const char *type)
 								{
 								    struct dpif_class *class;
 								    class = xmalloc(sizeof *class);
 								    *class = dpif_netdev_class;
 								    class->type = xstrdup(type);
 								    dp_register_provider(class);
 								}
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								void
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								dpif_dummy_register(bool override)
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								{
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
+								    if (override) {
 								        struct sset types;
 								        const char *type;
 								        sset_init(&types);
 								        dp_enumerate_types(&types);
 								        SSET_FOR_EACH (type, &types) {
 								            if (!dp_unregister_provider(type)) {
 								                dpif_dummy_register__(type);
 								            }
 								        }
 								        sset_destroy(&types);
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								    }
-												dummy: Make --enable-dummy=override replace all dpifs, netdevs by dummies.

Plain "--enable-dummy" just creates new dummy dpif and netdev classes.
This commit makes "--enable-dummy=override" go a step farther and actually
delete and replace all the existing dpif and netdev classes by copies of
the dummy class.

This is useful for testing in an environment where changing the classes in
Bridge or Interface records is challenging.

Requested-by: Andrew Lambeth <wal@nicira.com>
Tested-by: Andrew Lambeth <wal@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-19 10:24:46 -08:00
 								    dpif_dummy_register__("dummy");
-												ofproto-dpif: Tolerate spontaneous changes in datapath port numbers.

This can happen on ESX.

Also adds a test to make sure this works.

Bug #17634.
Signed-off-by: Ben Pfaff <blp@nicira.com>
Tested-by: Guolin Yang <gyang@vmware.com>

											
										
										
											2013-07-29 15:11:49 -07:00
 								    unixctl_command_register("dpif-dummy/change-port-number",
 								                             "DP PORT NEW-NUMBER",
 , 3, dpif_dummy_change_port_number, NULL);
-												Add new "dummy" netdev and dpif implementations for use in unit tests.

											
										
										
											2010-11-29 12:21:08 -08:00
+								}