ovs/ofproto/ofproto-dpif-upcall.c

/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.  */

#include <config.h>
#include "ofproto-dpif-upcall.h"

#include <errno.h>
#include <stdbool.h>
#include <inttypes.h>

#include "connmgr.h"
#include "coverage.h"
#include "dynamic-string.h"
#include "dpif.h"
#include "fail-open.h"
#include "guarded-list.h"
#include "latch.h"
#include "seq.h"
#include "list.h"
#include "netlink.h"
#include "ofpbuf.h"
#include "ofproto-dpif-ipfix.h"
#include "ofproto-dpif-sflow.h"
#include "packets.h"
#include "poll-loop.h"
#include "vlog.h"

#define MAX_QUEUE_LENGTH 512

VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);

COVERAGE_DEFINE(drop_queue_overflow);
COVERAGE_DEFINE(upcall_queue_overflow);
COVERAGE_DEFINE(fmb_queue_overflow);
COVERAGE_DEFINE(fmb_queue_revalidated);

/* A thread that processes each upcall handed to it by the dispatcher thread,
 * forwards the upcall's packet, and then queues it to the main ofproto_dpif
 * to possibly set up a kernel flow as a cache. */
struct handler {
    struct udpif *udpif;               /* Parent udpif. */
    pthread_t thread;                  /* Thread ID. */

    struct ovs_mutex mutex;            /* Mutex guarding the following. */

    /* Atomic queue of unprocessed upcalls. */
    struct list upcalls OVS_GUARDED;
    size_t n_upcalls OVS_GUARDED;

    bool need_signal;                  /* Only changed by the dispatcher. */

    pthread_cond_t wake_cond;          /* Wakes 'thread' while holding
                                          'mutex'. */
};

/* An upcall handler for ofproto_dpif.
 *
 * udpif is implemented as a "dispatcher" thread that reads upcalls from the
 * kernel.  It processes each upcall just enough to figure out its next
 * destination.  For a "miss" upcall (MISS_UPCALL), this is one of several
 * "handler" threads (see struct handler).  Other upcalls are queued to the
 * main ofproto_dpif. */
struct udpif {
    struct dpif *dpif;                 /* Datapath handle. */
    struct dpif_backer *backer;        /* Opaque dpif_backer pointer. */

    uint32_t secret;                   /* Random seed for upcall hash. */

    pthread_t dispatcher;              /* Dispatcher thread ID. */

    struct handler *handlers;          /* Upcall handlers. */
    size_t n_handlers;

    /* Queues to pass up to ofproto-dpif. */
    struct guarded_list drop_keys; /* "struct drop key"s. */
    struct guarded_list fmbs;      /* "struct flow_miss_batch"es. */

    struct seq *wait_seq;
    struct seq *reval_seq;

    struct latch exit_latch; /* Tells child threads to exit. */
};

enum upcall_type {
    BAD_UPCALL,                 /* Some kind of bug somewhere. */
    MISS_UPCALL,                /* A flow miss.  */
    SFLOW_UPCALL,               /* sFlow sample. */
    FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
    IPFIX_UPCALL                /* Per-bridge sampling. */
};

struct upcall {
    struct list list_node;          /* For queuing upcalls. */
    struct flow_miss *flow_miss;    /* This upcall's flow_miss. */

    /* Raw upcall plus data for keeping track of the memory backing it. */
    struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */
    struct ofpbuf upcall_buf;       /* Owns some data in 'dpif_upcall'. */
    uint64_t upcall_stub[512 / 8];  /* Buffer to reduce need for malloc(). */
};

static void upcall_destroy(struct upcall *);

static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);

static void recv_upcalls(struct udpif *);
static void handle_upcalls(struct udpif *, struct list *upcalls);
static void miss_destroy(struct flow_miss *);
static void *udpif_dispatcher(void *);
static void *udpif_upcall_handler(void *);

struct udpif *
udpif_create(struct dpif_backer *backer, struct dpif *dpif)
{
    struct udpif *udpif = xzalloc(sizeof *udpif);

    udpif->dpif = dpif;
    udpif->backer = backer;
    udpif->secret = random_uint32();
    udpif->wait_seq = seq_create();
    udpif->reval_seq = seq_create();
    latch_init(&udpif->exit_latch);
    guarded_list_init(&udpif->drop_keys);
    guarded_list_init(&udpif->fmbs);

    return udpif;
}

void
udpif_destroy(struct udpif *udpif)
{
    struct flow_miss_batch *fmb;
    struct drop_key *drop_key;

    udpif_recv_set(udpif, 0, false);

    while ((drop_key = drop_key_next(udpif))) {
        drop_key_destroy(drop_key);
    }

    while ((fmb = flow_miss_batch_next(udpif))) {
        flow_miss_batch_destroy(fmb);
    }

    guarded_list_destroy(&udpif->drop_keys);
    guarded_list_destroy(&udpif->fmbs);
    latch_destroy(&udpif->exit_latch);
    seq_destroy(udpif->wait_seq);
    seq_destroy(udpif->reval_seq);
    free(udpif);
}

/* Tells 'udpif' to begin or stop handling flow misses depending on the value
 * of 'enable'.  'n_handlers' is the number of upcall_handler threads to
 * create.  Passing 'n_handlers' as zero is equivalent to passing 'enable' as
 * false. */
void
udpif_recv_set(struct udpif *udpif, size_t n_handlers, bool enable)
{
    n_handlers = enable ? n_handlers : 0;

    /* Stop the old threads (if any). */
    if (udpif->handlers && udpif->n_handlers != n_handlers) {
        size_t i;

        latch_set(&udpif->exit_latch);

        /* Wake the handlers so they can exit. */
        for (i = 0; i < udpif->n_handlers; i++) {
            struct handler *handler = &udpif->handlers[i];

            ovs_mutex_lock(&handler->mutex);
            xpthread_cond_signal(&handler->wake_cond);
            ovs_mutex_unlock(&handler->mutex);
        }

        xpthread_join(udpif->dispatcher, NULL);
        for (i = 0; i < udpif->n_handlers; i++) {
            struct handler *handler = &udpif->handlers[i];
            struct upcall *miss, *next;

            xpthread_join(handler->thread, NULL);

            ovs_mutex_lock(&handler->mutex);
            LIST_FOR_EACH_SAFE (miss, next, list_node, &handler->upcalls) {
                list_remove(&miss->list_node);
                upcall_destroy(miss);
            }
            ovs_mutex_unlock(&handler->mutex);
            ovs_mutex_destroy(&handler->mutex);

            xpthread_cond_destroy(&handler->wake_cond);
        }
        latch_poll(&udpif->exit_latch);

        free(udpif->handlers);
        udpif->handlers = NULL;
        udpif->n_handlers = 0;
    }

    /* Start new threads (if necessary). */
    if (!udpif->handlers && n_handlers) {
        size_t i;

        udpif->n_handlers = n_handlers;
        udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers);
        for (i = 0; i < udpif->n_handlers; i++) {
            struct handler *handler = &udpif->handlers[i];

            handler->udpif = udpif;
            list_init(&handler->upcalls);
            handler->need_signal = false;
            xpthread_cond_init(&handler->wake_cond, NULL);
            ovs_mutex_init(&handler->mutex);
            xpthread_create(&handler->thread, NULL, udpif_upcall_handler,
                            handler);
        }
        xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif);
    }
}

void
udpif_wait(struct udpif *udpif)
{
    uint64_t seq = seq_read(udpif->wait_seq);
    if (!guarded_list_is_empty(&udpif->drop_keys) ||
        !guarded_list_is_empty(&udpif->fmbs)) {
        poll_immediate_wake();
    } else {
        seq_wait(udpif->wait_seq, seq);
    }
}

/* Notifies 'udpif' that something changed which may render previous
 * xlate_actions() results invalid. */
void
udpif_revalidate(struct udpif *udpif)
{
    struct flow_miss_batch *fmb, *next_fmb;
    struct list fmbs;

    /* Since we remove each miss on revalidation, their statistics won't be
     * accounted to the appropriate 'facet's in the upper layer.  In most
     * cases, this is alright because we've already pushed the stats to the
     * relevant rules. */
    seq_change(udpif->reval_seq);

    guarded_list_pop_all(&udpif->fmbs, &fmbs);
    LIST_FOR_EACH_SAFE (fmb, next_fmb, list_node, &fmbs) {
        list_remove(&fmb->list_node);
        flow_miss_batch_destroy(fmb);
    }

    udpif_drop_key_clear(udpif);
}

void
udpif_get_memory_usage(struct udpif *udpif, struct simap *usage)
{
    size_t i;

    simap_increase(usage, "dispatchers", 1);
    simap_increase(usage, "flow_dumpers", 1);

    simap_increase(usage, "handlers", udpif->n_handlers);
    for (i = 0; i < udpif->n_handlers; i++) {
        struct handler *handler = &udpif->handlers[i];
        ovs_mutex_lock(&handler->mutex);
        simap_increase(usage, "handler upcalls",  handler->n_upcalls);
        ovs_mutex_unlock(&handler->mutex);
    }
}

/* Destroys and deallocates 'upcall'. */
static void
upcall_destroy(struct upcall *upcall)
{
    if (upcall) {
        ofpbuf_uninit(&upcall->upcall_buf);
        free(upcall);
    }
}

/* Retrieves the next batch of processed flow misses for 'udpif' to install.
 * The caller is responsible for destroying it with flow_miss_batch_destroy().
 */
struct flow_miss_batch *
flow_miss_batch_next(struct udpif *udpif)
{
    int i;

    for (i = 0; i < 50; i++) {
        struct flow_miss_batch *next;
        struct list *next_node;

        next_node = guarded_list_pop_front(&udpif->fmbs);
        if (!next_node) {
            break;
        }

        next = CONTAINER_OF(next_node, struct flow_miss_batch, list_node);
        if (next->reval_seq == seq_read(udpif->reval_seq)) {
            return next;
        }

        flow_miss_batch_destroy(next);
    }

    return NULL;
}

/* Destroys and deallocates 'fmb'. */
void
flow_miss_batch_destroy(struct flow_miss_batch *fmb)
{
    struct flow_miss *miss, *next;
    struct upcall *upcall, *next_upcall;

    if (!fmb) {
        return;
    }

    HMAP_FOR_EACH_SAFE (miss, next, hmap_node, &fmb->misses) {
        hmap_remove(&fmb->misses, &miss->hmap_node);
        miss_destroy(miss);
    }

    LIST_FOR_EACH_SAFE (upcall, next_upcall, list_node, &fmb->upcalls) {
        list_remove(&upcall->list_node);
        upcall_destroy(upcall);
    }

    hmap_destroy(&fmb->misses);
    free(fmb);
}

/* Retrieves the next drop key which ofproto-dpif needs to process.  The caller
 * is responsible for destroying it with drop_key_destroy(). */
struct drop_key *
drop_key_next(struct udpif *udpif)
{
    struct list *next = guarded_list_pop_front(&udpif->drop_keys);
    return next ? CONTAINER_OF(next, struct drop_key, list_node) : NULL;
}

/* Destroys and deallocates 'drop_key'. */
void
drop_key_destroy(struct drop_key *drop_key)
{
    if (drop_key) {
        free(drop_key->key);
        free(drop_key);
    }
}

/* Clears all drop keys waiting to be processed by drop_key_next(). */
void
udpif_drop_key_clear(struct udpif *udpif)
{
    struct drop_key *drop_key, *next;
    struct list list;

    guarded_list_pop_all(&udpif->drop_keys, &list);
    LIST_FOR_EACH_SAFE (drop_key, next, list_node, &list) {
        list_remove(&drop_key->list_node);
        drop_key_destroy(drop_key);
    }
}

/* The dispatcher thread is responsible for receiving upcalls from the kernel,
 * assigning them to a upcall_handler thread. */
static void *
udpif_dispatcher(void *arg)
{
    struct udpif *udpif = arg;

    set_subprogram_name("dispatcher");
    while (!latch_is_set(&udpif->exit_latch)) {
        recv_upcalls(udpif);
        dpif_recv_wait(udpif->dpif);
        latch_wait(&udpif->exit_latch);
        poll_block();
    }

    return NULL;
}

/* The miss handler thread is responsible for processing miss upcalls retrieved
 * by the dispatcher thread.  Once finished it passes the processed miss
 * upcalls to ofproto-dpif where they're installed in the datapath. */
static void *
udpif_upcall_handler(void *arg)
{
    struct handler *handler = arg;

    set_subprogram_name("upcall_%u", ovsthread_id_self());
    for (;;) {
        struct list misses = LIST_INITIALIZER(&misses);
        size_t i;

        ovs_mutex_lock(&handler->mutex);

        if (latch_is_set(&handler->udpif->exit_latch)) {
            ovs_mutex_unlock(&handler->mutex);
            return NULL;
        }

        if (!handler->n_upcalls) {
            ovs_mutex_cond_wait(&handler->wake_cond, &handler->mutex);
        }

        for (i = 0; i < FLOW_MISS_MAX_BATCH; i++) {
            if (handler->n_upcalls) {
                handler->n_upcalls--;
                list_push_back(&misses, list_pop_front(&handler->upcalls));
            } else {
                break;
            }
        }
        ovs_mutex_unlock(&handler->mutex);

        handle_upcalls(handler->udpif, &misses);

        coverage_clear();
    }
}

static void
miss_destroy(struct flow_miss *miss)
{
    xlate_out_uninit(&miss->xout);
}

static enum upcall_type
classify_upcall(const struct upcall *upcall)
{
    const struct dpif_upcall *dpif_upcall = &upcall->dpif_upcall;
    union user_action_cookie cookie;
    size_t userdata_len;

    /* First look at the upcall type. */
    switch (dpif_upcall->type) {
    case DPIF_UC_ACTION:
        break;

    case DPIF_UC_MISS:
        return MISS_UPCALL;

    case DPIF_N_UC_TYPES:
    default:
        VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32,
                     dpif_upcall->type);
        return BAD_UPCALL;
    }

    /* "action" upcalls need a closer look. */
    if (!dpif_upcall->userdata) {
        VLOG_WARN_RL(&rl, "action upcall missing cookie");
        return BAD_UPCALL;
    }
    userdata_len = nl_attr_get_size(dpif_upcall->userdata);
    if (userdata_len < sizeof cookie.type
        || userdata_len > sizeof cookie) {
        VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
                     userdata_len);
        return BAD_UPCALL;
    }
    memset(&cookie, 0, sizeof cookie);
    memcpy(&cookie, nl_attr_get(dpif_upcall->userdata), userdata_len);
    if (userdata_len == sizeof cookie.sflow
        && cookie.type == USER_ACTION_COOKIE_SFLOW) {
        return SFLOW_UPCALL;
    } else if (userdata_len == sizeof cookie.slow_path
               && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) {
        return MISS_UPCALL;
    } else if (userdata_len == sizeof cookie.flow_sample
               && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
        return FLOW_SAMPLE_UPCALL;
    } else if (userdata_len == sizeof cookie.ipfix
               && cookie.type == USER_ACTION_COOKIE_IPFIX) {
        return IPFIX_UPCALL;
    } else {
        VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
                     " and size %"PRIuSIZE, cookie.type, userdata_len);
        return BAD_UPCALL;
    }
}

static void
recv_upcalls(struct udpif *udpif)
{
    int n;

    for (;;) {
        uint32_t hash = udpif->secret;
        struct handler *handler;
        struct upcall *upcall;
        size_t n_bytes, left;
        struct nlattr *nla;
        int error;

        upcall = xmalloc(sizeof *upcall);
        ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub,
                        sizeof upcall->upcall_stub);
        error = dpif_recv(udpif->dpif, &upcall->dpif_upcall,
                          &upcall->upcall_buf);
        if (error) {
            upcall_destroy(upcall);
            break;
        }

        n_bytes = 0;
        NL_ATTR_FOR_EACH (nla, left, upcall->dpif_upcall.key,
                          upcall->dpif_upcall.key_len) {
            enum ovs_key_attr type = nl_attr_type(nla);
            if (type == OVS_KEY_ATTR_IN_PORT
                || type == OVS_KEY_ATTR_TCP
                || type == OVS_KEY_ATTR_UDP) {
                if (nl_attr_get_size(nla) == 4) {
                    hash = mhash_add(hash, nl_attr_get_u32(nla));
                    n_bytes += 4;
                } else {
                    VLOG_WARN_RL(&rl,
                                 "Netlink attribute with incorrect size.");
                }
            }
        }
        hash =  mhash_finish(hash, n_bytes);

        handler = &udpif->handlers[hash % udpif->n_handlers];

        ovs_mutex_lock(&handler->mutex);
        if (handler->n_upcalls < MAX_QUEUE_LENGTH) {
            list_push_back(&handler->upcalls, &upcall->list_node);
            if (handler->n_upcalls == 0) {
                handler->need_signal = true;
            }
            handler->n_upcalls++;
            if (handler->need_signal &&
                handler->n_upcalls >= FLOW_MISS_MAX_BATCH) {
                handler->need_signal = false;
                xpthread_cond_signal(&handler->wake_cond);
            }
            ovs_mutex_unlock(&handler->mutex);
            if (!VLOG_DROP_DBG(&rl)) {
                struct ds ds = DS_EMPTY_INITIALIZER;

                odp_flow_key_format(upcall->dpif_upcall.key,
                                    upcall->dpif_upcall.key_len,
                                    &ds);
                VLOG_DBG("dispatcher: enqueue (%s)", ds_cstr(&ds));
                ds_destroy(&ds);
            }
        } else {
            ovs_mutex_unlock(&handler->mutex);
            COVERAGE_INC(upcall_queue_overflow);
            upcall_destroy(upcall);
        }
    }

    for (n = 0; n < udpif->n_handlers; ++n) {
        struct handler *handler = &udpif->handlers[n];

        if (handler->need_signal) {
            handler->need_signal = false;
            ovs_mutex_lock(&handler->mutex);
            xpthread_cond_signal(&handler->wake_cond);
            ovs_mutex_unlock(&handler->mutex);
        }
    }
}

static struct flow_miss *
flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto,
               const struct flow *flow, uint32_t hash)
{
    struct flow_miss *miss;

    HMAP_FOR_EACH_WITH_HASH (miss, hmap_node, hash, todo) {
        if (miss->ofproto == ofproto && flow_equal(&miss->flow, flow)) {
            return miss;
        }
    }

    return NULL;
}

static void
handle_upcalls(struct udpif *udpif, struct list *upcalls)
{
    struct dpif_op *opsp[FLOW_MISS_MAX_BATCH];
    struct dpif_op ops[FLOW_MISS_MAX_BATCH];
    struct upcall *upcall, *next;
    struct flow_miss_batch *fmb;
    size_t n_misses, n_ops, i;
    struct flow_miss *miss;
    enum upcall_type type;
    bool fail_open;

    /* Extract the flow from each upcall.  Construct in fmb->misses a hash
     * table that maps each unique flow to a 'struct flow_miss'.
     *
     * Most commonly there is a single packet per flow_miss, but there are
     * several reasons why there might be more than one, e.g.:
     *
     *   - The dpif packet interface does not support TSO (or UFO, etc.), so a
     *     large packet sent to userspace is split into a sequence of smaller
     *     ones.
     *
     *   - A stream of quickly arriving packets in an established "slow-pathed"
     *     flow.
     *
     *   - Rarely, a stream of quickly arriving packets in a flow not yet
     *     established.  (This is rare because most protocols do not send
     *     multiple back-to-back packets before receiving a reply from the
     *     other end of the connection, which gives OVS a chance to set up a
     *     datapath flow.)
     */
    fmb = xmalloc(sizeof *fmb);
    fmb->reval_seq = seq_read(udpif->reval_seq);
    hmap_init(&fmb->misses);
    list_init(&fmb->upcalls);
    n_misses = 0;
    LIST_FOR_EACH_SAFE (upcall, next, list_node, upcalls) {
        struct dpif_upcall *dupcall = &upcall->dpif_upcall;
        struct ofpbuf *packet = dupcall->packet;
        struct flow_miss *miss = &fmb->miss_buf[n_misses];
        struct flow_miss *existing_miss;
        struct ofproto_dpif *ofproto;
        struct dpif_sflow *sflow;
        struct dpif_ipfix *ipfix;
        odp_port_t odp_in_port;
        struct flow flow;
        int error;

        error = xlate_receive(udpif->backer, packet, dupcall->key,
                              dupcall->key_len, &flow, &miss->key_fitness,
                              &ofproto, &ipfix, &sflow, NULL, &odp_in_port);
        if (error) {
            if (error == ENODEV) {
                struct drop_key *drop_key;

                /* Received packet on datapath port for which we couldn't
                 * associate an ofproto.  This can happen if a port is removed
                 * while traffic is being received.  Print a rate-limited
                 * message in case it happens frequently.  Install a drop flow
                 * so that future packets of the flow are inexpensively dropped
                 * in the kernel. */
                VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
                             "port %"PRIu32, odp_in_port);

                drop_key = xmalloc(sizeof *drop_key);
                drop_key->key = xmemdup(dupcall->key, dupcall->key_len);
                drop_key->key_len = dupcall->key_len;

                if (guarded_list_push_back(&udpif->drop_keys,
                                           &drop_key->list_node,
                                           MAX_QUEUE_LENGTH)) {
                    seq_change(udpif->wait_seq);
                } else {
                    COVERAGE_INC(drop_queue_overflow);
                    drop_key_destroy(drop_key);
                }
            }
            list_remove(&upcall->list_node);
            upcall_destroy(upcall);
            continue;
        }

        type = classify_upcall(upcall);
        if (type == MISS_UPCALL) {
            uint32_t hash;

            flow_extract(packet, flow.skb_priority, flow.pkt_mark,
                         &flow.tunnel, &flow.in_port, &miss->flow);

            hash = flow_hash(&miss->flow, 0);
            existing_miss = flow_miss_find(&fmb->misses, ofproto, &miss->flow,
                                           hash);
            if (!existing_miss) {
                hmap_insert(&fmb->misses, &miss->hmap_node, hash);
                miss->ofproto = ofproto;
                miss->key = dupcall->key;
                miss->key_len = dupcall->key_len;
                miss->upcall_type = dupcall->type;
                miss->stats.n_packets = 0;
                miss->stats.n_bytes = 0;
                miss->stats.used = time_msec();
                miss->stats.tcp_flags = 0;

                n_misses++;
            } else {
                miss = existing_miss;
            }
            miss->stats.tcp_flags |= packet_get_tcp_flags(packet, &miss->flow);
            miss->stats.n_bytes += packet->size;
            miss->stats.n_packets++;

            upcall->flow_miss = miss;
            continue;
        }

        switch (type) {
        case SFLOW_UPCALL:
            if (sflow) {
                union user_action_cookie cookie;

                memset(&cookie, 0, sizeof cookie);
                memcpy(&cookie, nl_attr_get(dupcall->userdata),
                       sizeof cookie.sflow);
                dpif_sflow_received(sflow, dupcall->packet, &flow, odp_in_port,
                                    &cookie);
            }
            break;
        case IPFIX_UPCALL:
            if (ipfix) {
                dpif_ipfix_bridge_sample(ipfix, dupcall->packet, &flow);
            }
            break;
        case FLOW_SAMPLE_UPCALL:
            if (ipfix) {
                union user_action_cookie cookie;

                memset(&cookie, 0, sizeof cookie);
                memcpy(&cookie, nl_attr_get(dupcall->userdata),
                       sizeof cookie.flow_sample);

                /* The flow reflects exactly the contents of the packet.
                 * Sample the packet using it. */
                dpif_ipfix_flow_sample(ipfix, dupcall->packet, &flow,
                                       cookie.flow_sample.collector_set_id,
                                       cookie.flow_sample.probability,
                                       cookie.flow_sample.obs_domain_id,
                                       cookie.flow_sample.obs_point_id);
            }
            break;
        case BAD_UPCALL:
            break;
        case MISS_UPCALL:
            NOT_REACHED();
        }

        dpif_ipfix_unref(ipfix);
        dpif_sflow_unref(sflow);

        list_remove(&upcall->list_node);
        upcall_destroy(upcall);
    }

    /* Initialize each 'struct flow_miss's ->xout.
     *
     * We do this per-flow_miss rather than per-packet because, most commonly,
     * all the packets in a flow can use the same translation.
     *
     * We can't do this in the previous loop because we need the TCP flags for
     * all the packets in each miss. */
    fail_open = false;
    HMAP_FOR_EACH (miss, hmap_node, &fmb->misses) {
        struct xlate_in xin;

        xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL,
                      miss->stats.tcp_flags, NULL);
        xin.may_learn = true;
        xin.resubmit_stats = &miss->stats;
        xlate_actions(&xin, &miss->xout);
        fail_open = fail_open || miss->xout.fail_open;
    }

    /* Now handle the packets individually in order of arrival.  In the common
     * case each packet of a miss can share the same actions, but slow-pathed
     * packets need to be translated individually:
     *
     *   - For SLOW_CFM, SLOW_LACP, SLOW_STP, and SLOW_BFD, translation is what
     *     processes received packets for these protocols.
     *
     *   - For SLOW_CONTROLLER, translation sends the packet to the OpenFlow
     *     controller.
     *
     * The loop fills 'ops' with an array of operations to execute in the
     * datapath. */
    n_ops = 0;
    LIST_FOR_EACH (upcall, list_node, upcalls) {
        struct flow_miss *miss = upcall->flow_miss;
        struct ofpbuf *packet = upcall->dpif_upcall.packet;

        if (miss->xout.slow) {
            struct xlate_in xin;

            xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL, 0, packet);
            xlate_actions_for_side_effects(&xin);
        }

        if (miss->xout.odp_actions.size) {
            struct dpif_op *op;

            if (miss->flow.in_port.ofp_port
                != vsp_realdev_to_vlandev(miss->ofproto,
                                          miss->flow.in_port.ofp_port,
                                          miss->flow.vlan_tci)) {
                /* This packet was received on a VLAN splinter port.  We
                 * added a VLAN to the packet to make the packet resemble
                 * the flow, but the actions were composed assuming that
                 * the packet contained no VLAN.  So, we must remove the
                 * VLAN header from the packet before trying to execute the
                 * actions. */
                eth_pop_vlan(packet);
            }

            op = &ops[n_ops++];
            op->type = DPIF_OP_EXECUTE;
            op->u.execute.key = miss->key;
            op->u.execute.key_len = miss->key_len;
            op->u.execute.packet = packet;
            op->u.execute.actions = miss->xout.odp_actions.data;
            op->u.execute.actions_len = miss->xout.odp_actions.size;
            op->u.execute.needs_help = (miss->xout.slow & SLOW_ACTION) != 0;
        }
    }

    /* Execute batch. */
    for (i = 0; i < n_ops; i++) {
        opsp[i] = &ops[i];
    }
    dpif_operate(udpif->dpif, opsp, n_ops);

    /* Special case for fail-open mode.
     *
     * If we are in fail-open mode, but we are connected to a controller too,
     * then we should send the packet up to the controller in the hope that it
     * will try to set up a flow and thereby allow us to exit fail-open.
     *
     * See the top-level comment in fail-open.c for more information. */
    if (fail_open) {
        LIST_FOR_EACH (upcall, list_node, upcalls) {
            struct flow_miss *miss = upcall->flow_miss;
            struct ofpbuf *packet = upcall->dpif_upcall.packet;
            struct ofproto_packet_in *pin;

            pin = xmalloc(sizeof *pin);
            pin->up.packet = xmemdup(packet->data, packet->size);
            pin->up.packet_len = packet->size;
            pin->up.reason = OFPR_NO_MATCH;
            pin->up.table_id = 0;
            pin->up.cookie = OVS_BE64_MAX;
            flow_get_metadata(&miss->flow, &pin->up.fmd);
            pin->send_len = 0; /* Not used for flow table misses. */
            pin->generated_by_table_miss = false;
            ofproto_dpif_send_packet_in(miss->ofproto, pin);
        }
    }

    list_move(&fmb->upcalls, upcalls);

    if (fmb->reval_seq != seq_read(udpif->reval_seq)) {
        COVERAGE_INC(fmb_queue_revalidated);
        flow_miss_batch_destroy(fmb);
    } else if (!guarded_list_push_back(&udpif->fmbs, &fmb->list_node,
                                       MAX_QUEUE_LENGTH)) {
        COVERAGE_INC(fmb_queue_overflow);
        flow_miss_batch_destroy(fmb);
    } else {
        seq_change(udpif->wait_seq);
    }
}
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
 								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.  */
 								#include <config.h>
 								#include "ofproto-dpif-upcall.h"
 								#include <errno.h>
 								#include <stdbool.h>
 								#include <inttypes.h>
-												ofproto, ofp-util: Begin disentangling packet-in wire format and handling.

struct ofputil_packet_in mixes data included in OpenFlow packet_in messages
with data that used internally by ofproto and connmgr to queue and route
packet_ins.  This commit begins disentangling these purposes by adding a
new struct ofproto_packet_in that wraps struct ofputil_packet_in.  Adding
this new level of indirection causes a lot of code churn, so this commit
mainly takes care of that to make the remaining changes easier to read.

This commit does move the list node used for queuing packet_ins into the
new wrapper structure.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 16:16:31 -07:00
+								#include "connmgr.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "coverage.h"
 								#include "dynamic-string.h"
 								#include "dpif.h"
 								#include "fail-open.h"
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								#include "guarded-list.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "latch.h"
 								#include "seq.h"
 								#include "list.h"
 								#include "netlink.h"
 								#include "ofpbuf.h"
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								#include "ofproto-dpif-ipfix.h"
 								#include "ofproto-dpif-sflow.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "packets.h"
 								#include "poll-loop.h"
 								#include "vlog.h"
 								#define MAX_QUEUE_LENGTH 512
 								VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);
 								COVERAGE_DEFINE(drop_queue_overflow);
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								COVERAGE_DEFINE(upcall_queue_overflow);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								COVERAGE_DEFINE(fmb_queue_overflow);
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								COVERAGE_DEFINE(fmb_queue_revalidated);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
 								/* A thread that processes each upcall handed to it by the dispatcher thread,
 								 * forwards the upcall's packet, and then queues it to the main ofproto_dpif
 								 * to possibly set up a kernel flow as a cache. */
 								struct handler {
 								    struct udpif *udpif;               /* Parent udpif. */
 								    pthread_t thread;                  /* Thread ID. */
 								    struct ovs_mutex mutex;            /* Mutex guarding the following. */
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								    /* Atomic queue of unprocessed upcalls. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    struct list upcalls OVS_GUARDED;
 								    size_t n_upcalls OVS_GUARDED;
-												ofproto-dpif-upcall: reduce number of wakeup

If a queue length is long (ie. non-0), the consumer thread should
already be busy working on the queue.  there's no need to wake it
up repeatedly.

Signed-off-by: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-02 10:49:30 -07:00
+								    bool need_signal;                  /* Only changed by the dispatcher. */
-												ofproto-dpif-upcall: Batch upcalls.

Batching reduces overheads and enables upto 4 times the upcall processing
performance in a specialized test case.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-28 12:06:07 -07:00
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    pthread_cond_t wake_cond;          /* Wakes 'thread' while holding
 								                                          'mutex'. */
 								};
 								/* An upcall handler for ofproto_dpif.
 								 *
 								 * udpif is implemented as a "dispatcher" thread that reads upcalls from the
 								 * kernel.  It processes each upcall just enough to figure out its next
 								 * destination.  For a "miss" upcall (MISS_UPCALL), this is one of several
 								 * "handler" threads (see struct handler).  Other upcalls are queued to the
 								 * main ofproto_dpif. */
 								struct udpif {
 								    struct dpif *dpif;                 /* Datapath handle. */
 								    struct dpif_backer *backer;        /* Opaque dpif_backer pointer. */
 								    uint32_t secret;                   /* Random seed for upcall hash. */
 								    pthread_t dispatcher;              /* Dispatcher thread ID. */
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								    struct handler *handlers;          /* Upcall handlers. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    size_t n_handlers;
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    /* Queues to pass up to ofproto-dpif. */
 								    struct guarded_list drop_keys; /* "struct drop key"s. */
 								    struct guarded_list fmbs;      /* "struct flow_miss_batch"es. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
 								    struct seq *wait_seq;
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    struct seq *reval_seq;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
 								    struct latch exit_latch; /* Tells child threads to exit. */
 								};
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								enum upcall_type {
 								    BAD_UPCALL,                 /* Some kind of bug somewhere. */
 								    MISS_UPCALL,                /* A flow miss.  */
 								    SFLOW_UPCALL,               /* sFlow sample. */
 								    FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
 								    IPFIX_UPCALL                /* Per-bridge sampling. */
 								};
 								struct upcall {
 								    struct list list_node;          /* For queuing upcalls. */
 								    struct flow_miss *flow_miss;    /* This upcall's flow_miss. */
 								    /* Raw upcall plus data for keeping track of the memory backing it. */
 								    struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */
 								    struct ofpbuf upcall_buf;       /* Owns some data in 'dpif_upcall'. */
 								    uint64_t upcall_stub[512 / 8];  /* Buffer to reduce need for malloc(). */
 								};
 								static void upcall_destroy(struct upcall *);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 								static void recv_upcalls(struct udpif *);
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								static void handle_upcalls(struct udpif *, struct list *upcalls);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								static void miss_destroy(struct flow_miss *);
 								static void *udpif_dispatcher(void *);
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								static void *udpif_upcall_handler(void *);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
 								struct udpif *
 								udpif_create(struct dpif_backer *backer, struct dpif *dpif)
 								{
 								    struct udpif *udpif = xzalloc(sizeof *udpif);
 								    udpif->dpif = dpif;
 								    udpif->backer = backer;
 								    udpif->secret = random_uint32();
 								    udpif->wait_seq = seq_create();
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    udpif->reval_seq = seq_create();
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    latch_init(&udpif->exit_latch);
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    guarded_list_init(&udpif->drop_keys);
 								    guarded_list_init(&udpif->fmbs);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
 								    return udpif;
 								}
 								void
 								udpif_destroy(struct udpif *udpif)
 								{
 								    struct flow_miss_batch *fmb;
 								    struct drop_key *drop_key;
 								    udpif_recv_set(udpif, 0, false);
 								    while ((drop_key = drop_key_next(udpif))) {
 								        drop_key_destroy(drop_key);
 								    }
 								    while ((fmb = flow_miss_batch_next(udpif))) {
 								        flow_miss_batch_destroy(fmb);
 								    }
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    guarded_list_destroy(&udpif->drop_keys);
 								    guarded_list_destroy(&udpif->fmbs);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    latch_destroy(&udpif->exit_latch);
 								    seq_destroy(udpif->wait_seq);
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    seq_destroy(udpif->reval_seq);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    free(udpif);
 								}
 								/* Tells 'udpif' to begin or stop handling flow misses depending on the value
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								 * of 'enable'.  'n_handlers' is the number of upcall_handler threads to
 								 * create.  Passing 'n_handlers' as zero is equivalent to passing 'enable' as
 								 * false. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								void
 								udpif_recv_set(struct udpif *udpif, size_t n_handlers, bool enable)
 								{
 								    n_handlers = enable ? n_handlers : 0;
 								    /* Stop the old threads (if any). */
 								    if (udpif->handlers && udpif->n_handlers != n_handlers) {
 								        size_t i;
 								        latch_set(&udpif->exit_latch);
 								        /* Wake the handlers so they can exit. */
 								        for (i = 0; i < udpif->n_handlers; i++) {
 								            struct handler *handler = &udpif->handlers[i];
 								            ovs_mutex_lock(&handler->mutex);
 								            xpthread_cond_signal(&handler->wake_cond);
 								            ovs_mutex_unlock(&handler->mutex);
 								        }
 								        xpthread_join(udpif->dispatcher, NULL);
 								        for (i = 0; i < udpif->n_handlers; i++) {
 								            struct handler *handler = &udpif->handlers[i];
 								            struct upcall *miss, *next;
 								            xpthread_join(handler->thread, NULL);
 								            ovs_mutex_lock(&handler->mutex);
 								            LIST_FOR_EACH_SAFE (miss, next, list_node, &handler->upcalls) {
 								                list_remove(&miss->list_node);
 								                upcall_destroy(miss);
 								            }
 								            ovs_mutex_unlock(&handler->mutex);
 								            ovs_mutex_destroy(&handler->mutex);
 								            xpthread_cond_destroy(&handler->wake_cond);
 								        }
 								        latch_poll(&udpif->exit_latch);
 								        free(udpif->handlers);
 								        udpif->handlers = NULL;
 								        udpif->n_handlers = 0;
 								    }
 								    /* Start new threads (if necessary). */
 								    if (!udpif->handlers && n_handlers) {
 								        size_t i;
 								        udpif->n_handlers = n_handlers;
 								        udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers);
 								        for (i = 0; i < udpif->n_handlers; i++) {
 								            struct handler *handler = &udpif->handlers[i];
 								            handler->udpif = udpif;
 								            list_init(&handler->upcalls);
-												ofproto-dpif-upcall: reduce number of wakeup

If a queue length is long (ie. non-0), the consumer thread should
already be busy working on the queue.  there's no need to wake it
up repeatedly.

Signed-off-by: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-02 10:49:30 -07:00
+								            handler->need_signal = false;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								            xpthread_cond_init(&handler->wake_cond, NULL);
-												Use "error-checking" mutexes in place of other kinds wherever possible.

We've seen a number of deadlocks in the tree since thread safety was
introduced.  So far, all of these are self-deadlocks, that is, a single
thread acquiring a lock and then attempting to re-acquire the same lock
recursively.  When this has happened, the process simply hung, and it was
somewhat difficult to find the cause.

POSIX "error-checking" mutexes check for this specific problem (and
others).  This commit switches from other types of mutexes to
error-checking mutexes everywhere that we can, that is, everywhere that
we're not using recursive mutexes.  This ought to help find problems more
quickly in the future.

There might be performance advantages to other kinds of mutexes in some
cases.  However, the existing mutex type choices were just guesses, so I'd
rather go for easy detection of errors until we know that other mutex
types actually perform better in specific cases.  Also, I did a quick
microbenchmark of glibc mutex types on my host and found that the
error checking mutexes weren't any slower than the other types, at least
when the mutex is uncontended.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-08-20 13:40:02 -07:00
+								            ovs_mutex_init(&handler->mutex);
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								            xpthread_create(&handler->thread, NULL, udpif_upcall_handler,
 								                            handler);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        }
 								        xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif);
 								    }
 								}
 								void
 								udpif_wait(struct udpif *udpif)
 								{
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    uint64_t seq = seq_read(udpif->wait_seq);
 								    if (!guarded_list_is_empty(&udpif->drop_keys) ||
 								        !guarded_list_is_empty(&udpif->fmbs)) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        poll_immediate_wake();
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    } else {
 								        seq_wait(udpif->wait_seq, seq);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
 								}
 								/* Notifies 'udpif' that something changed which may render previous
 								 * xlate_actions() results invalid. */
 								void
 								udpif_revalidate(struct udpif *udpif)
 								{
 								    struct flow_miss_batch *fmb, *next_fmb;
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    struct list fmbs;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
 								    /* Since we remove each miss on revalidation, their statistics won't be
 								     * accounted to the appropriate 'facet's in the upper layer.  In most
 								     * cases, this is alright because we've already pushed the stats to the
-												ofproto: Modularize netflow.

The netflow code has its tentacles all over the ofproto-dpif module.
This is fine today, but in future facets, which correspond roughly to
netflow_flows, will be retired.  In preparation, this patch hides as
much implementation detail as possible inside the netflow module.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 23:07:55 -07:00
+								     * relevant rules. */
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    seq_change(udpif->reval_seq);
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
 								    guarded_list_pop_all(&udpif->fmbs, &fmbs);
 								    LIST_FOR_EACH_SAFE (fmb, next_fmb, list_node, &fmbs) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        list_remove(&fmb->list_node);
 								        flow_miss_batch_destroy(fmb);
 								    }
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    udpif_drop_key_clear(udpif);
 								}
-												ofproto-dpif-upcall: Add memory usage stats.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 17:41:02 -08:00
+								void
 								udpif_get_memory_usage(struct udpif *udpif, struct simap *usage)
 								{
 								    size_t i;
 								    simap_increase(usage, "dispatchers", 1);
 								    simap_increase(usage, "flow_dumpers", 1);
 								    simap_increase(usage, "handlers", udpif->n_handlers);
 								    for (i = 0; i < udpif->n_handlers; i++) {
 								        struct handler *handler = &udpif->handlers[i];
 								        ovs_mutex_lock(&handler->mutex);
 								        simap_increase(usage, "handler upcalls",  handler->n_upcalls);
 								        ovs_mutex_unlock(&handler->mutex);
 								    }
 								}
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								/* Destroys and deallocates 'upcall'. */
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								static void
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								upcall_destroy(struct upcall *upcall)
 								{
 								    if (upcall) {
 								        ofpbuf_uninit(&upcall->upcall_buf);
 								        free(upcall);
 								    }
 								}
-												ofproto-dpif-upcall: Fix typos in comments.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: pritesh <pritesh.kothari@cisco.com>

											
										
										
											2013-09-24 15:51:47 -07:00
+								/* Retrieves the next batch of processed flow misses for 'udpif' to install.
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								 * The caller is responsible for destroying it with flow_miss_batch_destroy().
 								 */
 								struct flow_miss_batch *
 								flow_miss_batch_next(struct udpif *udpif)
 								{
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    int i;
 								    for (i = 0; i < 50; i++) {
 								        struct flow_miss_batch *next;
 								        struct list *next_node;
 								        next_node = guarded_list_pop_front(&udpif->fmbs);
 								        if (!next_node) {
 								            break;
 								        }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								        next = CONTAINER_OF(next_node, struct flow_miss_batch, list_node);
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								        if (next->reval_seq == seq_read(udpif->reval_seq)) {
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								            return next;
 								        }
 								        flow_miss_batch_destroy(next);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
 								    return NULL;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								}
 								/* Destroys and deallocates 'fmb'. */
 								void
 								flow_miss_batch_destroy(struct flow_miss_batch *fmb)
 								{
 								    struct flow_miss *miss, *next;
-												ofproto-dpif-upcall: Fix a memory leak.

The "key" member in struct flow_miss refers to memory held by the "struct
upcall", hence the upcalls should be freed only after the flow misses are
processed by the main thread.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:57:22 -07:00
+								    struct upcall *upcall, *next_upcall;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
 								    if (!fmb) {
 								        return;
 								    }
 								    HMAP_FOR_EACH_SAFE (miss, next, hmap_node, &fmb->misses) {
 								        hmap_remove(&fmb->misses, &miss->hmap_node);
 								        miss_destroy(miss);
 								    }
-												ofproto-dpif-upcall: Fix a memory leak.

The "key" member in struct flow_miss refers to memory held by the "struct
upcall", hence the upcalls should be freed only after the flow misses are
processed by the main thread.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:57:22 -07:00
+								    LIST_FOR_EACH_SAFE (upcall, next_upcall, list_node, &fmb->upcalls) {
 								        list_remove(&upcall->list_node);
 								        upcall_destroy(upcall);
 								    }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    hmap_destroy(&fmb->misses);
 								    free(fmb);
 								}
-												ofproto-dpif-upcall: Fix typos in comments.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: pritesh <pritesh.kothari@cisco.com>

											
										
										
											2013-09-24 15:51:47 -07:00
+								/* Retrieves the next drop key which ofproto-dpif needs to process.  The caller
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								 * is responsible for destroying it with drop_key_destroy(). */
 								struct drop_key *
 								drop_key_next(struct udpif *udpif)
 								{
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    struct list *next = guarded_list_pop_front(&udpif->drop_keys);
 								    return next ? CONTAINER_OF(next, struct drop_key, list_node) : NULL;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								}
-												ofproto-dpif-upcall: Fix typos in comments.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: pritesh <pritesh.kothari@cisco.com>

											
										
										
											2013-09-24 15:51:47 -07:00
+								/* Destroys and deallocates 'drop_key'. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								void
 								drop_key_destroy(struct drop_key *drop_key)
 								{
 								    if (drop_key) {
 								        free(drop_key->key);
 								        free(drop_key);
 								    }
 								}
 								/* Clears all drop keys waiting to be processed by drop_key_next(). */
 								void
 								udpif_drop_key_clear(struct udpif *udpif)
 								{
 								    struct drop_key *drop_key, *next;
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    struct list list;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    guarded_list_pop_all(&udpif->drop_keys, &list);
 								    LIST_FOR_EACH_SAFE (drop_key, next, list_node, &list) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        list_remove(&drop_key->list_node);
 								        drop_key_destroy(drop_key);
 								    }
 								}
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								/* The dispatcher thread is responsible for receiving upcalls from the kernel,
 								 * assigning them to a upcall_handler thread. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								static void *
 								udpif_dispatcher(void *arg)
 								{
 								    struct udpif *udpif = arg;
 								    set_subprogram_name("dispatcher");
 								    while (!latch_is_set(&udpif->exit_latch)) {
 								        recv_upcalls(udpif);
 								        dpif_recv_wait(udpif->dpif);
 								        latch_wait(&udpif->exit_latch);
 								        poll_block();
 								    }
 								    return NULL;
 								}
-												ofproto-dpif-upcall: Fix typos in comments.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: pritesh <pritesh.kothari@cisco.com>

											
										
										
											2013-09-24 15:51:47 -07:00
+								/* The miss handler thread is responsible for processing miss upcalls retrieved
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								 * by the dispatcher thread.  Once finished it passes the processed miss
 								 * upcalls to ofproto-dpif where they're installed in the datapath. */
 								static void *
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								udpif_upcall_handler(void *arg)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
 								    struct handler *handler = arg;
-												ofproto-dpif-upcall: Give each miss_handler thread a unique name.

This may occasionally make debugging easier.

Suggested-by: Keith Amidon <keith@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-11-02 06:59:30 -07:00
+								    set_subprogram_name("upcall_%u", ovsthread_id_self());
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    for (;;) {
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								        struct list misses = LIST_INITIALIZER(&misses);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        size_t i;
 								        ovs_mutex_lock(&handler->mutex);
 								        if (latch_is_set(&handler->udpif->exit_latch)) {
 								            ovs_mutex_unlock(&handler->mutex);
 								            return NULL;
 								        }
 								        if (!handler->n_upcalls) {
 								            ovs_mutex_cond_wait(&handler->wake_cond, &handler->mutex);
 								        }
 								        for (i = 0; i < FLOW_MISS_MAX_BATCH; i++) {
 								            if (handler->n_upcalls) {
 								                handler->n_upcalls--;
 								                list_push_back(&misses, list_pop_front(&handler->upcalls));
 								            } else {
 								                break;
 								            }
 								        }
 								        ovs_mutex_unlock(&handler->mutex);
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        handle_upcalls(handler->udpif, &misses);
-												ofproto-dpif-upcall: Make miss handlers accumulate coverage counters.

The miss handler threads do not call poll_block(), which calls
coverage_clear() indirectly.  This meant that coverage counters incremented
by miss handler threads never got integrated into the global coverage
counters and therefore did not show up in coverage logging or
"ovs-appctl coverage/show" output.  This commit fixes the problem.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:24:05 -07:00
 								        coverage_clear();
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
 								}
 								static void
 								miss_destroy(struct flow_miss *miss)
 								{
 								    xlate_out_uninit(&miss->xout);
 								}
 								static enum upcall_type
 								classify_upcall(const struct upcall *upcall)
 								{
 								    const struct dpif_upcall *dpif_upcall = &upcall->dpif_upcall;
 								    union user_action_cookie cookie;
 								    size_t userdata_len;
 								    /* First look at the upcall type. */
 								    switch (dpif_upcall->type) {
 								    case DPIF_UC_ACTION:
 								        break;
 								    case DPIF_UC_MISS:
 								        return MISS_UPCALL;
 								    case DPIF_N_UC_TYPES:
 								    default:
 								        VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32,
 								                     dpif_upcall->type);
 								        return BAD_UPCALL;
 								    }
 								    /* "action" upcalls need a closer look. */
 								    if (!dpif_upcall->userdata) {
 								        VLOG_WARN_RL(&rl, "action upcall missing cookie");
 								        return BAD_UPCALL;
 								    }
 								    userdata_len = nl_attr_get_size(dpif_upcall->userdata);
 								    if (userdata_len < sizeof cookie.type
 								        || userdata_len > sizeof cookie) {
-												Avoid printf type modifiers not supported by MSVC C runtime library.

The MSVC C library printf() implementation does not support the 'z', 't',
'j', or 'hh' format specifiers.  This commit changes the Open vSwitch code
to avoid those format specifiers, switching to standard macros from
<inttypes.h> where available and inventing new macros resembling them
where necessary.  It also updates CodingStyle to specify the macros' use
and adds a Makefile rule to report violations.

Signed-off-by: Alin Serdean <aserdean@cloudbasesolutions.com>
Co-authored-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-25 23:38:48 -08:00
+								        VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								                     userdata_len);
 								        return BAD_UPCALL;
 								    }
 								    memset(&cookie, 0, sizeof cookie);
 								    memcpy(&cookie, nl_attr_get(dpif_upcall->userdata), userdata_len);
 								    if (userdata_len == sizeof cookie.sflow
 								        && cookie.type == USER_ACTION_COOKIE_SFLOW) {
 								        return SFLOW_UPCALL;
 								    } else if (userdata_len == sizeof cookie.slow_path
 								               && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) {
 								        return MISS_UPCALL;
 								    } else if (userdata_len == sizeof cookie.flow_sample
 								               && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
 								        return FLOW_SAMPLE_UPCALL;
 								    } else if (userdata_len == sizeof cookie.ipfix
 								               && cookie.type == USER_ACTION_COOKIE_IPFIX) {
 								        return IPFIX_UPCALL;
 								    } else {
 								        VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
-												Avoid printf type modifiers not supported by MSVC C runtime library.

The MSVC C library printf() implementation does not support the 'z', 't',
'j', or 'hh' format specifiers.  This commit changes the Open vSwitch code
to avoid those format specifiers, switching to standard macros from
<inttypes.h> where available and inventing new macros resembling them
where necessary.  It also updates CodingStyle to specify the macros' use
and adds a Makefile rule to report violations.

Signed-off-by: Alin Serdean <aserdean@cloudbasesolutions.com>
Co-authored-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-25 23:38:48 -08:00
+								                     " and size %"PRIuSIZE, cookie.type, userdata_len);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        return BAD_UPCALL;
 								    }
 								}
 								static void
 								recv_upcalls(struct udpif *udpif)
 								{
-												ofproto-dpif-upcall: Batch upcalls.

Batching reduces overheads and enables upto 4 times the upcall processing
performance in a specialized test case.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-28 12:06:07 -07:00
+								    int n;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    for (;;) {
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        uint32_t hash = udpif->secret;
 								        struct handler *handler;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        struct upcall *upcall;
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        size_t n_bytes, left;
 								        struct nlattr *nla;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        int error;
 								        upcall = xmalloc(sizeof *upcall);
 								        ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub,
 								                        sizeof upcall->upcall_stub);
 								        error = dpif_recv(udpif->dpif, &upcall->dpif_upcall,
 								                          &upcall->upcall_buf);
 								        if (error) {
 								            upcall_destroy(upcall);
 								            break;
 								        }
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        n_bytes = 0;
 								        NL_ATTR_FOR_EACH (nla, left, upcall->dpif_upcall.key,
 								                          upcall->dpif_upcall.key_len) {
 								            enum ovs_key_attr type = nl_attr_type(nla);
 								            if (type == OVS_KEY_ATTR_IN_PORT
 								                || type == OVS_KEY_ATTR_TCP
 								                || type == OVS_KEY_ATTR_UDP) {
 								                if (nl_attr_get_size(nla) == 4) {
-												ofproto-dpif: Correct endian problem in recv_upcalls()

Use nl_attr_get_u32() instead of nl_attr_get_be32() to parse nla
so that the decoded value which is passed to mhash_add()
is host byte order as mhash_add() expects.

This resolves a minor regression introduced by
10e576406c7444ef ("ofproto-dpif: Move special upcall handling into
ofproto-dpif-upcall.").

I do not expect this change has any runtime implications.

Detected using sparse.

Cc: Ethan Jackson <ethan@nicira.com>
Cc: Ben Pfaff <blp@nicira.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-01 15:15:22 +09:00
+								                    hash = mhash_add(hash, nl_attr_get_u32(nla));
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								                    n_bytes += 4;
 								                } else {
 								                    VLOG_WARN_RL(&rl,
 								                                 "Netlink attribute with incorrect size.");
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								                }
 								            }
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        }
 								        hash =  mhash_finish(hash, n_bytes);
-												ofproto-dpif-upcall.c: Fix indentation.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-22 16:01:41 -07:00
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        handler = &udpif->handlers[hash % udpif->n_handlers];
-												ofproto-dpif-upcall.c: Fix indentation.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-22 16:01:41 -07:00
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        ovs_mutex_lock(&handler->mutex);
 								        if (handler->n_upcalls < MAX_QUEUE_LENGTH) {
 								            list_push_back(&handler->upcalls, &upcall->list_node);
-												ofproto-dpif-upcall: reduce number of wakeup

If a queue length is long (ie. non-0), the consumer thread should
already be busy working on the queue.  there's no need to wake it
up repeatedly.

Signed-off-by: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-02 10:49:30 -07:00
+								            if (handler->n_upcalls == 0) {
 								                handler->need_signal = true;
 								            }
 								            handler->n_upcalls++;
 								            if (handler->need_signal &&
 								                handler->n_upcalls >= FLOW_MISS_MAX_BATCH) {
 								                handler->need_signal = false;
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								                xpthread_cond_signal(&handler->wake_cond);
-												ofproto-dpif-upcall.c: Fix indentation.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-22 16:01:41 -07:00
+								            }
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								            ovs_mutex_unlock(&handler->mutex);
 								            if (!VLOG_DROP_DBG(&rl)) {
 								                struct ds ds = DS_EMPTY_INITIALIZER;
 								                odp_flow_key_format(upcall->dpif_upcall.key,
 								                                    upcall->dpif_upcall.key_len,
 								                                    &ds);
 								                VLOG_DBG("dispatcher: enqueue (%s)", ds_cstr(&ds));
 								                ds_destroy(&ds);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								            }
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        } else {
 								            ovs_mutex_unlock(&handler->mutex);
 								            COVERAGE_INC(upcall_queue_overflow);
 								            upcall_destroy(upcall);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        }
 								    }
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
-												ofproto-dpif-upcall: Batch upcalls.

Batching reduces overheads and enables upto 4 times the upcall processing
performance in a specialized test case.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-28 12:06:07 -07:00
+								    for (n = 0; n < udpif->n_handlers; ++n) {
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        struct handler *handler = &udpif->handlers[n];
-												ofproto-dpif-upcall: reduce number of wakeup

If a queue length is long (ie. non-0), the consumer thread should
already be busy working on the queue.  there's no need to wake it
up repeatedly.

Signed-off-by: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-02 10:49:30 -07:00
+								        if (handler->need_signal) {
 								            handler->need_signal = false;
-												ofproto-dpif-upcall: Batch upcalls.

Batching reduces overheads and enables upto 4 times the upcall processing
performance in a specialized test case.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-08-28 12:06:07 -07:00
+								            ovs_mutex_lock(&handler->mutex);
 								            xpthread_cond_signal(&handler->wake_cond);
 								            ovs_mutex_unlock(&handler->mutex);
 								        }
 								    }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								}
 								static struct flow_miss *
 								flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto,
 								               const struct flow *flow, uint32_t hash)
 								{
 								    struct flow_miss *miss;
 								    HMAP_FOR_EACH_WITH_HASH (miss, hmap_node, hash, todo) {
 								        if (miss->ofproto == ofproto && flow_equal(&miss->flow, flow)) {
 								            return miss;
 								        }
 								    }
 								    return NULL;
 								}
 								static void
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								handle_upcalls(struct udpif *udpif, struct list *upcalls)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
 								    struct dpif_op *opsp[FLOW_MISS_MAX_BATCH];
 								    struct dpif_op ops[FLOW_MISS_MAX_BATCH];
 								    struct upcall *upcall, *next;
 								    struct flow_miss_batch *fmb;
-												ofproto-dpif-upcall: Fix a memory leak.

The "key" member in struct flow_miss refers to memory held by the "struct
upcall", hence the upcalls should be freed only after the flow misses are
processed by the main thread.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:57:22 -07:00
+								    size_t n_misses, n_ops, i;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    struct flow_miss *miss;
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								    enum upcall_type type;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								    bool fail_open;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								    /* Extract the flow from each upcall.  Construct in fmb->misses a hash
 								     * table that maps each unique flow to a 'struct flow_miss'.
 								     *
 								     * Most commonly there is a single packet per flow_miss, but there are
 								     * several reasons why there might be more than one, e.g.:
 								     *
 								     *   - The dpif packet interface does not support TSO (or UFO, etc.), so a
 								     *     large packet sent to userspace is split into a sequence of smaller
 								     *     ones.
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								     *
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								     *   - A stream of quickly arriving packets in an established "slow-pathed"
 								     *     flow.
 								     *
 								     *   - Rarely, a stream of quickly arriving packets in a flow not yet
 								     *     established.  (This is rare because most protocols do not send
 								     *     multiple back-to-back packets before receiving a reply from the
 								     *     other end of the connection, which gives OVS a chance to set up a
 								     *     datapath flow.)
 								     */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    fmb = xmalloc(sizeof *fmb);
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    fmb->reval_seq = seq_read(udpif->reval_seq);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    hmap_init(&fmb->misses);
-												ofproto-dpif-upcall: Fix a memory leak.

The "key" member in struct flow_miss refers to memory held by the "struct
upcall", hence the upcalls should be freed only after the flow misses are
processed by the main thread.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:57:22 -07:00
+								    list_init(&fmb->upcalls);
 								    n_misses = 0;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    LIST_FOR_EACH_SAFE (upcall, next, list_node, upcalls) {
 								        struct dpif_upcall *dupcall = &upcall->dpif_upcall;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								        struct ofpbuf *packet = dupcall->packet;
-												ofproto-dpif-upcall: Fix a memory leak.

The "key" member in struct flow_miss refers to memory held by the "struct
upcall", hence the upcalls should be freed only after the flow misses are
processed by the main thread.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:57:22 -07:00
+								        struct flow_miss *miss = &fmb->miss_buf[n_misses];
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        struct flow_miss *existing_miss;
 								        struct ofproto_dpif *ofproto;
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        struct dpif_sflow *sflow;
 								        struct dpif_ipfix *ipfix;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        odp_port_t odp_in_port;
 								        struct flow flow;
 								        int error;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								        error = xlate_receive(udpif->backer, packet, dupcall->key,
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								                              dupcall->key_len, &flow, &miss->key_fitness,
-												ofproto: Retrieve ipfix, sflow and netflow in xlate_receive().

This seems cleaner than having separate accessors for them.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-31 16:23:13 -07:00
+								                              &ofproto, &ipfix, &sflow, NULL, &odp_in_port);
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        if (error) {
 								            if (error == ENODEV) {
 								                struct drop_key *drop_key;
 								                /* Received packet on datapath port for which we couldn't
 								                 * associate an ofproto.  This can happen if a port is removed
 								                 * while traffic is being received.  Print a rate-limited
 								                 * message in case it happens frequently.  Install a drop flow
 								                 * so that future packets of the flow are inexpensively dropped
 								                 * in the kernel. */
 								                VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
 								                             "port %"PRIu32, odp_in_port);
 								                drop_key = xmalloc(sizeof *drop_key);
 								                drop_key->key = xmemdup(dupcall->key, dupcall->key_len);
 								                drop_key->key_len = dupcall->key_len;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								                if (guarded_list_push_back(&udpif->drop_keys,
 								                                           &drop_key->list_node,
 								                                           MAX_QUEUE_LENGTH)) {
 								                    seq_change(udpif->wait_seq);
 								                } else {
 								                    COVERAGE_INC(drop_queue_overflow);
 								                    drop_key_destroy(drop_key);
 								                }
 								            }
 								            list_remove(&upcall->list_node);
 								            upcall_destroy(upcall);
 								            continue;
 								        }
 								        type = classify_upcall(upcall);
 								        if (type == MISS_UPCALL) {
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								            uint32_t hash;
 								            flow_extract(packet, flow.skb_priority, flow.pkt_mark,
 								                         &flow.tunnel, &flow.in_port, &miss->flow);
 								            hash = flow_hash(&miss->flow, 0);
 								            existing_miss = flow_miss_find(&fmb->misses, ofproto, &miss->flow,
 								                                           hash);
 								            if (!existing_miss) {
 								                hmap_insert(&fmb->misses, &miss->hmap_node, hash);
 								                miss->ofproto = ofproto;
 								                miss->key = dupcall->key;
 								                miss->key_len = dupcall->key_len;
 								                miss->upcall_type = dupcall->type;
 								                miss->stats.n_packets = 0;
 								                miss->stats.n_bytes = 0;
 								                miss->stats.used = time_msec();
 								                miss->stats.tcp_flags = 0;
-												ofproto-dpif-upcall: Fix a memory leak.

The "key" member in struct flow_miss refers to memory held by the "struct
upcall", hence the upcalls should be freed only after the flow misses are
processed by the main thread.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:57:22 -07:00
+								                n_misses++;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								            } else {
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								                miss = existing_miss;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								            }
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								            miss->stats.tcp_flags |= packet_get_tcp_flags(packet, &miss->flow);
 								            miss->stats.n_bytes += packet->size;
 								            miss->stats.n_packets++;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								            upcall->flow_miss = miss;
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								            continue;
 								        }
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        switch (type) {
 								        case SFLOW_UPCALL:
 								            if (sflow) {
 								                union user_action_cookie cookie;
 								                memset(&cookie, 0, sizeof cookie);
 								                memcpy(&cookie, nl_attr_get(dupcall->userdata),
 								                       sizeof cookie.sflow);
 								                dpif_sflow_received(sflow, dupcall->packet, &flow, odp_in_port,
 								                                    &cookie);
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								            }
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								            break;
 								        case IPFIX_UPCALL:
 								            if (ipfix) {
 								                dpif_ipfix_bridge_sample(ipfix, dupcall->packet, &flow);
 								            }
 								            break;
 								        case FLOW_SAMPLE_UPCALL:
 								            if (ipfix) {
 								                union user_action_cookie cookie;
 								                memset(&cookie, 0, sizeof cookie);
 								                memcpy(&cookie, nl_attr_get(dupcall->userdata),
 								                       sizeof cookie.flow_sample);
 								                /* The flow reflects exactly the contents of the packet.
 								                 * Sample the packet using it. */
 								                dpif_ipfix_flow_sample(ipfix, dupcall->packet, &flow,
 								                                       cookie.flow_sample.collector_set_id,
 								                                       cookie.flow_sample.probability,
 								                                       cookie.flow_sample.obs_domain_id,
 								                                       cookie.flow_sample.obs_point_id);
 								            }
 								            break;
 								        case BAD_UPCALL:
 								            break;
 								        case MISS_UPCALL:
 								            NOT_REACHED();
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        }
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
-												ofproto: Retrieve ipfix, sflow and netflow in xlate_receive().

This seems cleaner than having separate accessors for them.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-31 16:23:13 -07:00
+								        dpif_ipfix_unref(ipfix);
 								        dpif_sflow_unref(sflow);
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								        list_remove(&upcall->list_node);
 								        upcall_destroy(upcall);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								    /* Initialize each 'struct flow_miss's ->xout.
 								     *
 								     * We do this per-flow_miss rather than per-packet because, most commonly,
 								     * all the packets in a flow can use the same translation.
 								     *
 								     * We can't do this in the previous loop because we need the TCP flags for
 								     * all the packets in each miss. */
 								    fail_open = false;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    HMAP_FOR_EACH (miss, hmap_node, &fmb->misses) {
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								        struct xlate_in xin;
-												ofproto-dpif-xlate: Do initial rule lookup for callers.

None of the functions available in ofproto-dpif.h are thread safe
unless holding the xlate_rwlock because one can't know that an ofproto
or ofport used as argument will survive during the function call.  For
this reason, ofproto-dpif-upcall's invocation of rule_dpif_lookup()
is unsafe because the ofproto could be destroyed during the call.

This patch fixes the problem by optionally doing the initial rule
lookup in xlate_actions() so that it can be done while holding the
xlate_rwlock.  This has the nice side benefit of removing a bunch of
boilerplate.

Note that this only partially solves the problem, there's still
vsp_realdev_to_vlandev() and ofproto_dpif_send_packet_in() which
aren't thread safe for the same reason.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-10-09 13:23:31 -07:00
+								        xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL,
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								                      miss->stats.tcp_flags, NULL);
 								        xin.may_learn = true;
 								        xin.resubmit_stats = &miss->stats;
 								        xlate_actions(&xin, &miss->xout);
-												ofproto-dpif-xlate: Do initial rule lookup for callers.

None of the functions available in ofproto-dpif.h are thread safe
unless holding the xlate_rwlock because one can't know that an ofproto
or ofport used as argument will survive during the function call.  For
this reason, ofproto-dpif-upcall's invocation of rule_dpif_lookup()
is unsafe because the ofproto could be destroyed during the call.

This patch fixes the problem by optionally doing the initial rule
lookup in xlate_actions() so that it can be done while holding the
xlate_rwlock.  This has the nice side benefit of removing a bunch of
boilerplate.

Note that this only partially solves the problem, there's still
vsp_realdev_to_vlandev() and ofproto_dpif_send_packet_in() which
aren't thread safe for the same reason.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-10-09 13:23:31 -07:00
+								        fail_open = fail_open || miss->xout.fail_open;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								    }
 								    /* Now handle the packets individually in order of arrival.  In the common
 								     * case each packet of a miss can share the same actions, but slow-pathed
 								     * packets need to be translated individually:
 								     *
 								     *   - For SLOW_CFM, SLOW_LACP, SLOW_STP, and SLOW_BFD, translation is what
 								     *     processes received packets for these protocols.
 								     *
 								     *   - For SLOW_CONTROLLER, translation sends the packet to the OpenFlow
 								     *     controller.
 								     *
 								     * The loop fills 'ops' with an array of operations to execute in the
 								     * datapath. */
 								    n_ops = 0;
 								    LIST_FOR_EACH (upcall, list_node, upcalls) {
 								        struct flow_miss *miss = upcall->flow_miss;
 								        struct ofpbuf *packet = upcall->dpif_upcall.packet;
 								        if (miss->xout.slow) {
 								            struct xlate_in xin;
-												ofproto-dpif-xlate: Do initial rule lookup for callers.

None of the functions available in ofproto-dpif.h are thread safe
unless holding the xlate_rwlock because one can't know that an ofproto
or ofport used as argument will survive during the function call.  For
this reason, ofproto-dpif-upcall's invocation of rule_dpif_lookup()
is unsafe because the ofproto could be destroyed during the call.

This patch fixes the problem by optionally doing the initial rule
lookup in xlate_actions() so that it can be done while holding the
xlate_rwlock.  This has the nice side benefit of removing a bunch of
boilerplate.

Note that this only partially solves the problem, there's still
vsp_realdev_to_vlandev() and ofproto_dpif_send_packet_in() which
aren't thread safe for the same reason.

Signed-off-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-10-09 13:23:31 -07:00
+								            xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL, 0, packet);
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								            xlate_actions_for_side_effects(&xin);
 								        }
 								        if (miss->xout.odp_actions.size) {
 								            struct dpif_op *op;
 								            if (miss->flow.in_port.ofp_port
 								                != vsp_realdev_to_vlandev(miss->ofproto,
 								                                          miss->flow.in_port.ofp_port,
 								                                          miss->flow.vlan_tci)) {
 								                /* This packet was received on a VLAN splinter port.  We
 								                 * added a VLAN to the packet to make the packet resemble
 								                 * the flow, but the actions were composed assuming that
 								                 * the packet contained no VLAN.  So, we must remove the
 								                 * VLAN header from the packet before trying to execute the
 								                 * actions. */
 								                eth_pop_vlan(packet);
 								            }
 								            op = &ops[n_ops++];
 								            op->type = DPIF_OP_EXECUTE;
 								            op->u.execute.key = miss->key;
 								            op->u.execute.key_len = miss->key_len;
 								            op->u.execute.packet = packet;
 								            op->u.execute.actions = miss->xout.odp_actions.data;
 								            op->u.execute.actions_len = miss->xout.odp_actions.size;
-												dpif: Support working around actions that a datapath does not support.

Until now, OVS has expected that the datapath supports all the actions
required by any flow to be installed.  There are at least two reasons why
a datapath might not support a given action:

    - The datapath version is older than the userspace version, and the
      action was introduced after the version of the datapath in use.

    - The action is not considered important enough to implement as part of
      an ABI that must be maintained forever.

This commit adds infrastructure to handle these cases.  It doesn't actually
add any uses; that will come in an upcoming commit.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-09 17:28:05 -07:00
+								            op->u.execute.needs_help = (miss->xout.slow & SLOW_ACTION) != 0;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								        }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
 								    /* Execute batch. */
 								    for (i = 0; i < n_ops; i++) {
 								        opsp[i] = &ops[i];
 								    }
 								    dpif_operate(udpif->dpif, opsp, n_ops);
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								    /* Special case for fail-open mode.
 								     *
 								     * If we are in fail-open mode, but we are connected to a controller too,
 								     * then we should send the packet up to the controller in the hope that it
 								     * will try to set up a flow and thereby allow us to exit fail-open.
 								     *
 								     * See the top-level comment in fail-open.c for more information. */
 								    if (fail_open) {
 								        LIST_FOR_EACH (upcall, list_node, upcalls) {
 								            struct flow_miss *miss = upcall->flow_miss;
 								            struct ofpbuf *packet = upcall->dpif_upcall.packet;
-												ofproto, ofp-util: Begin disentangling packet-in wire format and handling.

struct ofputil_packet_in mixes data included in OpenFlow packet_in messages
with data that used internally by ofproto and connmgr to queue and route
packet_ins.  This commit begins disentangling these purposes by adding a
new struct ofproto_packet_in that wraps struct ofputil_packet_in.  Adding
this new level of indirection causes a lot of code churn, so this commit
mainly takes care of that to make the remaining changes easier to read.

This commit does move the list node used for queuing packet_ins into the
new wrapper structure.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 16:16:31 -07:00
+								            struct ofproto_packet_in *pin;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
 								            pin = xmalloc(sizeof *pin);
-												ofproto, ofp-util: Begin disentangling packet-in wire format and handling.

struct ofputil_packet_in mixes data included in OpenFlow packet_in messages
with data that used internally by ofproto and connmgr to queue and route
packet_ins.  This commit begins disentangling these purposes by adding a
new struct ofproto_packet_in that wraps struct ofputil_packet_in.  Adding
this new level of indirection causes a lot of code churn, so this commit
mainly takes care of that to make the remaining changes easier to read.

This commit does move the list node used for queuing packet_ins into the
new wrapper structure.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 16:16:31 -07:00
+								            pin->up.packet = xmemdup(packet->data, packet->size);
 								            pin->up.packet_len = packet->size;
 								            pin->up.reason = OFPR_NO_MATCH;
 								            pin->up.table_id = 0;
-												ofp-util: Use correct cookie value in "packet_in"s when no flow involved.

OpenFlow 1.3 uses all-1-bits in a packet_in to indicate that the packet_in
was not generated by a flow, but Open vSwitch incorrectly used 0.  This
fixes the problem.

For consistency, this commit also changes NXT_PACKET_IN to use all-1-bits
for this case, event though NXT_PACKET_IN was previously defined to use
zero.  This doesn't appear to make a difference for the NVP controller; if
it causes a problem for some other controller then I will revert that part
of the change.

Found by inspection.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 16:57:46 -07:00
+								            pin->up.cookie = OVS_BE64_MAX;
-												ofproto, ofp-util: Begin disentangling packet-in wire format and handling.

struct ofputil_packet_in mixes data included in OpenFlow packet_in messages
with data that used internally by ofproto and connmgr to queue and route
packet_ins.  This commit begins disentangling these purposes by adding a
new struct ofproto_packet_in that wraps struct ofputil_packet_in.  Adding
this new level of indirection causes a lot of code churn, so this commit
mainly takes care of that to make the remaining changes easier to read.

This commit does move the list node used for queuing packet_ins into the
new wrapper structure.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 16:16:31 -07:00
+								            flow_get_metadata(&miss->flow, &pin->up.fmd);
-												connmgr: Move send_len from ofputil_packet_in to ofproto_packet_in.

send_len is not directly part of the OpenFlow packet_in message, at least
given that it is partially redundant with packet_len.  send_len is, rather,
a request to the connmgr that expresses how many bytes the action requested
be sent to the controller, but the connmgr cannot always honor it.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 16:32:13 -07:00
+								            pin->send_len = 0; /* Not used for flow table misses. */
-												connmgr: Fix packet-in reason for OpenFlow1.3 table-miss flow entries.

As per spec, make packet-in reason for OpenFlow1.3 table-miss flow
entries no_match rather than action.

Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 21:50:23 -07:00
+								            pin->generated_by_table_miss = false;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								            ofproto_dpif_send_packet_in(miss->ofproto, pin);
 								        }
 								    }
-												ofproto-dpif-upcall: Fix a memory leak.

The "key" member in struct flow_miss refers to memory held by the "struct
upcall", hence the upcalls should be freed only after the flow misses are
processed by the main thread.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:57:22 -07:00
+								    list_move(&fmb->upcalls, upcalls);
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    if (fmb->reval_seq != seq_read(udpif->reval_seq)) {
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								        COVERAGE_INC(fmb_queue_revalidated);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        flow_miss_batch_destroy(fmb);
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    } else if (!guarded_list_push_back(&udpif->fmbs, &fmb->list_node,
 								                                       MAX_QUEUE_LENGTH)) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        COVERAGE_INC(fmb_queue_overflow);
 								        flow_miss_batch_destroy(fmb);
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								    } else {
 								        seq_change(udpif->wait_seq);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
 								}