ovs/ofproto/ofproto-dpif-upcall.c

/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.  */

#include <config.h>
#include "ofproto-dpif-upcall.h"

#include <errno.h>
#include <stdbool.h>
#include <inttypes.h>

#include "connmgr.h"
#include "coverage.h"
#include "cmap.h"
#include "lib/dpif-provider.h"
#include "dpif.h"
#include "openvswitch/dynamic-string.h"
#include "fail-open.h"
#include "guarded-list.h"
#include "latch.h"
#include "openvswitch/list.h"
#include "netlink.h"
#include "openvswitch/ofpbuf.h"
#include "ofproto-dpif-ipfix.h"
#include "ofproto-dpif-sflow.h"
#include "ofproto-dpif-xlate.h"
#include "ofproto-dpif-xlate-cache.h"
#include "ofproto-dpif-trace.h"
#include "ovs-rcu.h"
#include "packets.h"
#include "openvswitch/poll-loop.h"
#include "seq.h"
#include "tunnel.h"
#include "unixctl.h"
#include "openvswitch/usdt-probes.h"
#include "openvswitch/vlog.h"
#include "lib/netdev-provider.h"

#define UPCALL_MAX_BATCH 64
#define REVALIDATE_MAX_BATCH 50
#define UINT64_THREE_QUARTERS (UINT64_MAX / 4 * 3)

VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);

COVERAGE_DEFINE(dumped_duplicate_flow);
COVERAGE_DEFINE(dumped_inconsistent_flow);
COVERAGE_DEFINE(dumped_new_flow);
COVERAGE_DEFINE(handler_duplicate_upcall);
COVERAGE_DEFINE(revalidate_missed_dp_flow);
COVERAGE_DEFINE(revalidate_missing_dp_flow);
COVERAGE_DEFINE(ukey_dp_change);
COVERAGE_DEFINE(ukey_invalid_stat_reset);
COVERAGE_DEFINE(ukey_replace_contention);
COVERAGE_DEFINE(upcall_flow_limit_grew);
COVERAGE_DEFINE(upcall_flow_limit_hit);
COVERAGE_DEFINE(upcall_flow_limit_kill);
COVERAGE_DEFINE(upcall_flow_limit_reduced);
COVERAGE_DEFINE(upcall_flow_limit_scaled);
COVERAGE_DEFINE(upcall_ukey_contention);
COVERAGE_DEFINE(upcall_ukey_replace);

/* A thread that reads upcalls from dpif, forwards each upcall's packet,
 * and possibly sets up a kernel flow as a cache. */
struct handler {
    struct udpif *udpif;               /* Parent udpif. */
    pthread_t thread;                  /* Thread ID. */
    uint32_t handler_id;               /* Handler id. */
};

/* In the absence of a multiple-writer multiple-reader datastructure for
 * storing udpif_keys ("ukeys"), we use a large number of cmaps, each with its
 * own lock for writing. */
#define N_UMAPS 512 /* per udpif. */
struct umap {
    struct ovs_mutex mutex;            /* Take for writing to the following. */
    struct cmap cmap;                  /* Datapath flow keys. */
};

/* A thread that processes datapath flows, updates OpenFlow statistics, and
 * updates or removes them if necessary.
 *
 * Revalidator threads operate in two phases: "dump" and "sweep". In between
 * each phase, all revalidators sync up so that all revalidator threads are
 * either in one phase or the other, but not a combination.
 *
 *     During the dump phase, revalidators fetch flows from the datapath and
 *     attribute the statistics to OpenFlow rules. Each datapath flow has a
 *     corresponding ukey which caches the most recently seen statistics. If
 *     a flow needs to be deleted (for example, because it is unused over a
 *     period of time), revalidator threads may delete the flow during the
 *     dump phase. The datapath is not guaranteed to reliably dump all flows
 *     from the datapath, and there is no mapping between datapath flows to
 *     revalidators, so a particular flow may be handled by zero or more
 *     revalidators during a single dump phase. To avoid duplicate attribution
 *     of statistics, ukeys are never deleted during this phase.
 *
 *     During the sweep phase, each revalidator takes ownership of a different
 *     slice of umaps and sweeps through all ukeys in those umaps to figure out
 *     whether they need to be deleted. During this phase, revalidators may
 *     fetch individual flows which were not dumped during the dump phase to
 *     validate them and attribute statistics.
 */
struct revalidator {
    struct udpif *udpif;               /* Parent udpif. */
    pthread_t thread;                  /* Thread ID. */
    unsigned int id;                   /* ovsthread_id_self(). */
};

/* An upcall handler for ofproto_dpif.
 *
 * udpif keeps records of two kind of logically separate units:
 *
 * upcall handling
 * ---------------
 *
 *    - An array of 'struct handler's for upcall handling and flow
 *      installation.
 *
 * flow revalidation
 * -----------------
 *
 *    - Revalidation threads which read the datapath flow table and maintains
 *      them.
 */
struct udpif {
    struct ovs_list list_node;         /* In all_udpifs list. */

    struct dpif *dpif;                 /* Datapath handle. */
    struct dpif_backer *backer;        /* Opaque dpif_backer pointer. */

    struct handler *handlers;          /* Upcall handlers. */
    uint32_t n_handlers;

    struct revalidator *revalidators;  /* Flow revalidators. */
    uint32_t n_revalidators;

    struct latch exit_latch;           /* Tells child threads to exit. */

    /* Revalidation. */
    struct seq *reval_seq;             /* Incremented to force revalidation. */
    bool reval_exit;                   /* Set by leader on 'exit_latch. */
    struct ovs_barrier reval_barrier;  /* Barrier used by revalidators. */
    struct dpif_flow_dump *dump;       /* DPIF flow dump state. */
    long long int dump_duration;       /* Duration of the last flow dump. */
    struct seq *dump_seq;              /* Increments each dump iteration. */
    atomic_bool enable_ufid;           /* If true, skip dumping flow attrs. */

    /* These variables provide a mechanism for the main thread to pause
     * all revalidation without having to completely shut the threads down.
     * 'pause_latch' is shared between the main thread and the lead
     * revalidator thread, so when it is desirable to halt revalidation, the
     * main thread will set the latch. 'pause' and 'pause_barrier' are shared
     * by revalidator threads. The lead revalidator will set 'pause' when it
     * observes the latch has been set, and this will cause all revalidator
     * threads to wait on 'pause_barrier' at the beginning of the next
     * revalidation round. */
    bool pause;                        /* Set by leader on 'pause_latch. */
    struct latch pause_latch;          /* Set to force revalidators pause. */
    struct ovs_barrier pause_barrier;  /* Barrier used to pause all */
                                       /* revalidators by main thread. */

    /* There are 'N_UMAPS' maps containing 'struct udpif_key' elements.
     *
     * During the flow dump phase, revalidators insert into these with a random
     * distribution. During the garbage collection phase, each revalidator
     * takes care of garbage collecting a slice of these maps. */
    struct umap *ukeys;

    /* Datapath flow statistics. */
    unsigned int max_n_flows;
    unsigned int avg_n_flows;

    /* Following fields are accessed and modified by different threads. */
    atomic_uint flow_limit;            /* Datapath flow hard limit. */

    /* n_flows_mutex prevents multiple threads updating these concurrently. */
    atomic_uint n_flows;               /* Number of flows in the datapath. */
    atomic_llong n_flows_timestamp;    /* Last time n_flows was updated. */
    struct ovs_mutex n_flows_mutex;

    /* Following fields are accessed and modified only from the main thread. */
    struct unixctl_conn **conns;       /* Connections waiting on dump_seq. */
    uint64_t conn_seq;                 /* Corresponds to 'dump_seq' when
                                          conns[n_conns-1] was stored. */
    size_t n_conns;                    /* Number of connections waiting. */

    long long int offload_rebalance_time;  /* Time of last offload rebalance */
};

enum upcall_type {
    BAD_UPCALL,                 /* Some kind of bug somewhere. */
    MISS_UPCALL,                /* A flow miss.  */
    SLOW_PATH_UPCALL,           /* Slow path upcall.  */
    SFLOW_UPCALL,               /* sFlow sample. */
    FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
    IPFIX_UPCALL,               /* Per-bridge sampling. */
    CONTROLLER_UPCALL           /* Destined for the controller. */
};

enum reval_result {
    UKEY_KEEP,
    UKEY_DELETE,
    UKEY_MODIFY
};

struct upcall {
    struct ofproto_dpif *ofproto;  /* Parent ofproto. */
    const struct recirc_id_node *recirc; /* Recirculation context. */
    bool have_recirc_ref;                /* Reference held on recirc ctx? */

    /* The flow and packet are only required to be constant when using
     * dpif-netdev.  If a modification is absolutely necessary, a const cast
     * may be used with other datapaths. */
    const struct flow *flow;       /* Parsed representation of the packet. */
    enum odp_key_fitness fitness;  /* Fitness of 'flow' relative to ODP key. */
    const ovs_u128 *ufid;          /* Unique identifier for 'flow'. */
    unsigned pmd_id;               /* Datapath poll mode driver id. */
    const struct dp_packet *packet;   /* Packet associated with this upcall. */
    ofp_port_t ofp_in_port;        /* OpenFlow in port, or OFPP_NONE. */
    uint16_t mru;                  /* If !0, Maximum receive unit of
                                      fragmented IP packet */
    uint64_t hash;
    uint32_t pid;                  /* Socket PID this upcall was received from,
                                    * or zero. */

    enum upcall_type type;         /* Type of the upcall. */
    const struct nlattr *actions;  /* Flow actions in DPIF_UC_ACTION Upcalls. */

    bool xout_initialized;         /* True if 'xout' must be uninitialized. */
    struct xlate_out xout;         /* Result of xlate_actions(). */
    struct ofpbuf odp_actions;     /* Datapath actions from xlate_actions(). */
    struct flow_wildcards wc;      /* Dependencies that megaflow must match. */
    struct ofpbuf put_actions;     /* Actions 'put' in the fastpath. */

    struct dpif_ipfix *ipfix;      /* IPFIX pointer or NULL. */
    struct dpif_sflow *sflow;      /* SFlow pointer or NULL. */

    struct udpif_key *ukey;        /* Revalidator flow cache. */
    bool ukey_persists;            /* Set true to keep 'ukey' beyond the
                                      lifetime of this upcall. */

    uint64_t reval_seq;            /* udpif->reval_seq at translation time. */

    /* Not used by the upcall callback interface. */
    const struct nlattr *key;      /* Datapath flow key. */
    size_t key_len;                /* Datapath flow key length. */
    const struct nlattr *out_tun_key;  /* Datapath output tunnel key. */

    struct user_action_cookie cookie;

    uint64_t odp_actions_stub[1024 / 8]; /* Stub for odp_actions. */
};

/* Ukeys must transition through these states using transition_ukey(). */
enum ukey_state {
    UKEY_CREATED = 0,
    UKEY_VISIBLE,       /* Ukey is in umap, datapath flow install is queued. */
    UKEY_OPERATIONAL,   /* Ukey is in umap, datapath flow is installed. */
    UKEY_INCONSISTENT,  /* Ukey is in umap, datapath flow is inconsistent. */
    UKEY_EVICTING,      /* Ukey is in umap, datapath flow delete is queued. */
    UKEY_EVICTED,       /* Ukey is in umap, datapath flow is deleted. */
    UKEY_DELETED,       /* Ukey removed from umap, ukey free is deferred. */
};
#define N_UKEY_STATES (UKEY_DELETED + 1)

/* Ukey delete reasons used by USDT probes.  Please keep in sync with the
 * definition in utilities/usdt-scripts/flow_reval_monitor.py.  */
enum flow_del_reason {
    FDR_NONE = 0,           /* No delete reason specified. */
    FDR_AVOID_CACHING,      /* Cache avoidance flag set. */
    FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
    FDR_FLOW_IDLE,          /* Flow idle timeout. */
    FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
    FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
    FDR_NO_OFPROTO,         /* Bridge not found. */
    FDR_PURGE,              /* User requested flow deletion. */
    FDR_TOO_EXPENSIVE,      /* Too expensive to revalidate. */
    FDR_UPDATE_FAIL,        /* Datapath update failed. */
    FDR_XLATION_ERROR,      /* Flow translation error. */
    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
};

/* 'udpif_key's are responsible for tracking the little bit of state udpif
 * needs to do flow expiration which can't be pulled directly from the
 * datapath.  They may be created by any handler or revalidator thread at any
 * time, and read by any revalidator during the dump phase. They are however
 * each owned by a single revalidator which takes care of destroying them
 * during the garbage-collection phase.
 *
 * The mutex within the ukey protects some members of the ukey. The ukey
 * itself is protected by RCU and is held within a umap in the parent udpif.
 * Adding or removing a ukey from a umap is only safe when holding the
 * corresponding umap lock. */
struct udpif_key {
    struct cmap_node cmap_node;     /* In parent revalidator 'ukeys' map. */

    /* These elements are read only once created, and therefore aren't
     * protected by a mutex. */
    const struct nlattr *key;      /* Datapath flow key. */
    size_t key_len;                /* Length of 'key'. */
    const struct nlattr *mask;     /* Datapath flow mask. */
    size_t mask_len;               /* Length of 'mask'. */
    ovs_u128 ufid;                 /* Unique flow identifier. */
    bool ufid_present;             /* True if 'ufid' is in datapath. */
    uint32_t hash;                 /* Pre-computed hash for 'key'. */
    unsigned pmd_id;               /* Datapath poll mode driver id. */

    struct ovs_mutex mutex;                   /* Guards the following. */
    struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
    const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
    long long int created OVS_GUARDED;        /* Estimate of creation time. */
    uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
    uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
    enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
    uint32_t missed_dumps OVS_GUARDED;        /* Missed consecutive dumps. */

    /* 'state' debug information. */
    unsigned int state_thread OVS_GUARDED;    /* Thread that transitions. */
    const char *state_where OVS_GUARDED;      /* transition_ukey() locator. */

    /* Datapath flow actions as nlattrs.  Protected by RCU.  Read with
     * ukey_get_actions(), and write with ukey_set_actions(). */
    OVSRCU_TYPE(struct ofpbuf *) actions;

    struct xlate_cache *xcache OVS_GUARDED;   /* Cache for xlate entries that
                                               * are affected by this ukey.
                                               * Used for stats and learning.*/
    union {
        struct odputil_keybuf buf;
        struct nlattr nla;
    } keybuf, maskbuf;

    uint32_t key_recirc_id;   /* Non-zero if reference is held by the ukey. */
    struct recirc_refs recircs;  /* Action recirc IDs with references held. */

#define OFFL_REBAL_INTVL_MSEC  3000	/* dynamic offload rebalance freq */
    struct netdev *in_netdev;		/* in_odp_port's netdev */
    bool offloaded;			/* True if flow is offloaded */
    uint64_t flow_pps_rate;		/* Packets-Per-Second rate */
    long long int flow_time;		/* last pps update time */
    uint64_t flow_packets;		/* #pkts seen in interval */
    uint64_t flow_backlog_packets;	/* prev-mode #pkts (offl or kernel) */
};

/* Datapath operation with optional ukey attached. */
struct ukey_op {
    struct udpif_key *ukey;
    struct dpif_flow_stats stats; /* Stats for 'op'. */
    struct dpif_op dop;           /* Flow operation. */
};

static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
static struct ovs_list all_udpifs = OVS_LIST_INITIALIZER(&all_udpifs);

static size_t recv_upcalls(struct handler *);
static int process_upcall(struct udpif *, struct upcall *,
                          struct ofpbuf *odp_actions, struct flow_wildcards *);
static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls);
static void udpif_stop_threads(struct udpif *, bool delete_flows);
static void udpif_start_threads(struct udpif *, uint32_t n_handlers,
                                uint32_t n_revalidators);
static void udpif_pause_revalidators(struct udpif *);
static void udpif_resume_revalidators(struct udpif *);
static void *udpif_upcall_handler(void *);
static void *udpif_revalidator(void *);
static unsigned long udpif_get_n_flows(struct udpif *);
static void revalidate(struct revalidator *);
static void revalidator_pause(struct revalidator *);
static void revalidator_sweep(struct revalidator *);
static void revalidator_purge(struct revalidator *);
static void upcall_unixctl_show(struct unixctl_conn *conn, int argc,
                                const char *argv[], void *aux);
static void upcall_unixctl_disable_megaflows(struct unixctl_conn *, int argc,
                                             const char *argv[], void *aux);
static void upcall_unixctl_enable_megaflows(struct unixctl_conn *, int argc,
                                            const char *argv[], void *aux);
static void upcall_unixctl_disable_ufid(struct unixctl_conn *, int argc,
                                              const char *argv[], void *aux);
static void upcall_unixctl_enable_ufid(struct unixctl_conn *, int argc,
                                             const char *argv[], void *aux);

static void upcall_unixctl_set_flow_limit(struct unixctl_conn *conn, int argc,
                                            const char *argv[], void *aux);
static void upcall_unixctl_dump_wait(struct unixctl_conn *conn, int argc,
                                     const char *argv[], void *aux);
static void upcall_unixctl_purge(struct unixctl_conn *conn, int argc,
                                 const char *argv[], void *aux);
static void upcall_unixctl_pause(struct unixctl_conn *conn, int argc,
                                 const char *argv[], void *aux);
static void upcall_unixctl_resume(struct unixctl_conn *conn, int argc,
                                  const char *argv[], void *aux);

static void upcall_unixctl_ofproto_detrace(struct unixctl_conn *, int argc,
                                           const char *argv[], void *aux);

static struct udpif_key *ukey_create_from_upcall(struct upcall *,
                                                 struct flow_wildcards *);
static int ukey_create_from_dpif_flow(const struct udpif *,
                                      const struct dpif_flow *,
                                      struct udpif_key **);
static void ukey_get_actions(struct udpif_key *, const struct nlattr **actions,
                             size_t *size);
static bool ukey_install__(struct udpif *, struct udpif_key *ukey)
    OVS_TRY_LOCK(true, ukey->mutex);
static bool ukey_install(struct udpif *udpif, struct udpif_key *ukey);
static void transition_ukey_at(struct udpif_key *ukey, enum ukey_state dst,
                               const char *where)
    OVS_REQUIRES(ukey->mutex);
#define transition_ukey(UKEY, DST) \
    transition_ukey_at(UKEY, DST, OVS_SOURCE_LOCATOR)
static struct udpif_key *ukey_lookup(struct udpif *udpif,
                                     const ovs_u128 *ufid,
                                     const unsigned pmd_id);
static int ukey_acquire(struct udpif *, const struct dpif_flow *,
                        struct udpif_key **result, int *error);
static void ukey_delete__(struct udpif_key *);
static void ukey_delete(struct umap *, struct udpif_key *);
static enum upcall_type classify_upcall(enum dpif_upcall_type type,
                                        const struct nlattr *userdata,
                                        struct user_action_cookie *cookie);

static void put_op_init(struct ukey_op *op, struct udpif_key *ukey,
                        enum dpif_flow_put_flags flags);
static void delete_op_init(struct udpif *udpif, struct ukey_op *op,
                           struct udpif_key *ukey);

static int upcall_receive(struct upcall *, const struct dpif_backer *,
                          const struct dp_packet *packet, enum dpif_upcall_type,
                          const struct nlattr *userdata, const struct flow *,
                          const unsigned int mru,
                          const ovs_u128 *ufid, const unsigned pmd_id,
                          char **errorp);
static void upcall_uninit(struct upcall *);

static void udpif_flow_rebalance(struct udpif *udpif);
static int udpif_flow_program(struct udpif *udpif, struct udpif_key *ukey,
                              enum dpif_offload_type offload_type);
static int udpif_flow_unprogram(struct udpif *udpif, struct udpif_key *ukey,
                                enum dpif_offload_type offload_type);

static upcall_callback upcall_cb;
static dp_purge_callback dp_purge_cb;

static atomic_bool enable_megaflows = true;
static atomic_bool enable_ufid = true;

void
udpif_init(void)
{
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
    if (ovsthread_once_start(&once)) {
        unixctl_command_register("upcall/show", "", 0, 0, upcall_unixctl_show,
                                 NULL);
        unixctl_command_register("upcall/disable-megaflows", "", 0, 0,
                                 upcall_unixctl_disable_megaflows, NULL);
        unixctl_command_register("upcall/enable-megaflows", "", 0, 0,
                                 upcall_unixctl_enable_megaflows, NULL);
        unixctl_command_register("upcall/disable-ufid", "", 0, 0,
                                 upcall_unixctl_disable_ufid, NULL);
        unixctl_command_register("upcall/enable-ufid", "", 0, 0,
                                 upcall_unixctl_enable_ufid, NULL);
        unixctl_command_register("upcall/set-flow-limit", "flow-limit-number",
                                 1, 1, upcall_unixctl_set_flow_limit, NULL);
        unixctl_command_register("revalidator/wait", "", 0, 0,
                                 upcall_unixctl_dump_wait, NULL);
        unixctl_command_register("revalidator/purge", "", 0, 0,
                                 upcall_unixctl_purge, NULL);
        unixctl_command_register("revalidator/pause", NULL, 0, 0,
                                 upcall_unixctl_pause, NULL);
        unixctl_command_register("revalidator/resume", NULL, 0, 0,
                                 upcall_unixctl_resume, NULL);
        unixctl_command_register("ofproto/detrace", "UFID [pmd=PMD-ID]", 1, 2,
                                 upcall_unixctl_ofproto_detrace, NULL);
        ovsthread_once_done(&once);
    }
}

struct udpif *
udpif_create(struct dpif_backer *backer, struct dpif *dpif)
{
    struct udpif *udpif = xzalloc(sizeof *udpif);

    udpif->dpif = dpif;
    udpif->backer = backer;
    atomic_init(&udpif->flow_limit, MIN(ofproto_flow_limit, 10000));
    udpif->reval_seq = seq_create();
    udpif->dump_seq = seq_create();
    latch_init(&udpif->exit_latch);
    latch_init(&udpif->pause_latch);
    ovs_list_push_back(&all_udpifs, &udpif->list_node);
    atomic_init(&udpif->enable_ufid, false);
    atomic_init(&udpif->n_flows, 0);
    atomic_init(&udpif->n_flows_timestamp, LLONG_MIN);
    ovs_mutex_init(&udpif->n_flows_mutex);
    udpif->ukeys = xmalloc(N_UMAPS * sizeof *udpif->ukeys);
    for (int i = 0; i < N_UMAPS; i++) {
        cmap_init(&udpif->ukeys[i].cmap);
        ovs_mutex_init(&udpif->ukeys[i].mutex);
    }

    dpif_register_upcall_cb(dpif, upcall_cb, udpif);
    dpif_register_dp_purge_cb(dpif, dp_purge_cb, udpif);

    return udpif;
}

void
udpif_run(struct udpif *udpif)
{
    if (udpif->conns && udpif->conn_seq != seq_read(udpif->dump_seq)) {
        int i;

        for (i = 0; i < udpif->n_conns; i++) {
            unixctl_command_reply(udpif->conns[i], NULL);
        }
        free(udpif->conns);
        udpif->conns = NULL;
        udpif->n_conns = 0;
    }
}

void
udpif_destroy(struct udpif *udpif)
{
    udpif_stop_threads(udpif, false);

    dpif_register_dp_purge_cb(udpif->dpif, NULL, udpif);
    dpif_register_upcall_cb(udpif->dpif, NULL, udpif);

    for (int i = 0; i < N_UMAPS; i++) {
        struct udpif_key *ukey;

        CMAP_FOR_EACH (ukey, cmap_node, &udpif->ukeys[i].cmap) {
            ukey_delete__(ukey);
        }
        cmap_destroy(&udpif->ukeys[i].cmap);
        ovs_mutex_destroy(&udpif->ukeys[i].mutex);
    }
    free(udpif->ukeys);
    udpif->ukeys = NULL;

    ovs_list_remove(&udpif->list_node);
    latch_destroy(&udpif->exit_latch);
    latch_destroy(&udpif->pause_latch);
    seq_destroy(udpif->reval_seq);
    seq_destroy(udpif->dump_seq);
    ovs_mutex_destroy(&udpif->n_flows_mutex);
    free(udpif);
}

/* Stops the handler and revalidator threads.
 *
 * If 'delete_flows' is true, we delete ukeys and delete all flows from the
 * datapath.  Otherwise, we end up double-counting stats for flows that remain
 * in the datapath.  If 'delete_flows' is false, we skip this step.  This is
 * appropriate if OVS is about to exit anyway and it is desirable to let
 * existing network connections continue being forwarded afterward. */
static void
udpif_stop_threads(struct udpif *udpif, bool delete_flows)
{
    if (udpif && (udpif->n_handlers != 0 || udpif->n_revalidators != 0)) {
        size_t i;

        /* Tell the threads to exit. */
        latch_set(&udpif->exit_latch);

        /* Wait for the threads to exit.  Quiesce because this can take a long
         * time.. */
        ovsrcu_quiesce_start();
        for (i = 0; i < udpif->n_handlers; i++) {
            xpthread_join(udpif->handlers[i].thread, NULL);
        }
        for (i = 0; i < udpif->n_revalidators; i++) {
            xpthread_join(udpif->revalidators[i].thread, NULL);
        }
        dpif_disable_upcall(udpif->dpif);
        ovsrcu_quiesce_end();

        if (delete_flows) {
            for (i = 0; i < udpif->n_revalidators; i++) {
                revalidator_purge(&udpif->revalidators[i]);
            }
        }

        latch_poll(&udpif->exit_latch);

        ovs_barrier_destroy(&udpif->reval_barrier);
        ovs_barrier_destroy(&udpif->pause_barrier);

        free(udpif->revalidators);
        udpif->revalidators = NULL;
        udpif->n_revalidators = 0;

        free(udpif->handlers);
        udpif->handlers = NULL;
        udpif->n_handlers = 0;
    }
}

/* Starts the handler and revalidator threads. */
static void
udpif_start_threads(struct udpif *udpif, uint32_t n_handlers_,
                    uint32_t n_revalidators_)
{
    if (udpif && n_revalidators_) {
        /* Creating a thread can take a significant amount of time on some
         * systems, even hundred of milliseconds, so quiesce around it. */
        ovsrcu_quiesce_start();

        udpif->n_handlers = n_handlers_;
        udpif->n_revalidators = n_revalidators_;

        if (udpif->n_handlers) {
            udpif->handlers = xzalloc(udpif->n_handlers
                                      * sizeof *udpif->handlers);
            for (size_t i = 0; i < udpif->n_handlers; i++) {
                struct handler *handler = &udpif->handlers[i];

                handler->udpif = udpif;
                handler->handler_id = i;
                handler->thread = ovs_thread_create(
                    "handler", udpif_upcall_handler, handler);
            }
        } else {
            udpif->handlers = NULL;
        }

        atomic_init(&udpif->enable_ufid, udpif->backer->rt_support.ufid);
        dpif_enable_upcall(udpif->dpif);

        ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators);
        ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1);
        udpif->reval_exit = false;
        udpif->pause = false;
        udpif->offload_rebalance_time = time_msec();
        udpif->revalidators = xzalloc(udpif->n_revalidators
                                      * sizeof *udpif->revalidators);
        for (size_t i = 0; i < udpif->n_revalidators; i++) {
            struct revalidator *revalidator = &udpif->revalidators[i];

            revalidator->udpif = udpif;
            revalidator->thread = ovs_thread_create(
                "revalidator", udpif_revalidator, revalidator);
        }
        ovsrcu_quiesce_end();
    }
}

/* Pauses all revalidators.  Should only be called by the main thread.
 * When function returns, all revalidators are paused and will proceed
 * only after udpif_resume_revalidators() is called. */
static void
udpif_pause_revalidators(struct udpif *udpif)
{
    if (udpif->backer->recv_set_enable) {
        latch_set(&udpif->pause_latch);
        ovs_barrier_block(&udpif->pause_barrier);
    }
}

/* Resumes the pausing of revalidators.  Should only be called by the
 * main thread. */
static void
udpif_resume_revalidators(struct udpif *udpif)
{
    if (udpif->backer->recv_set_enable) {
        latch_poll(&udpif->pause_latch);
        ovs_barrier_block(&udpif->pause_barrier);
    }
}

/* Tells 'udpif' how many threads it should use to handle upcalls.
 * 'n_handlers_' and 'n_revalidators_' can never be zero.  'udpif''s
 * datapath handle must have packet reception enabled before starting
 * threads. */
void
udpif_set_threads(struct udpif *udpif, uint32_t n_handlers_,
                  uint32_t n_revalidators_)
{
    ovs_assert(udpif);
    uint32_t n_handlers_requested;
    uint32_t n_revalidators_requested;
    bool forced = false;

    if (dpif_number_handlers_required(udpif->dpif, &n_handlers_requested)) {
        forced = true;
        if (!n_revalidators_) {
            n_revalidators_requested = (n_handlers_requested
                                        ? n_handlers_requested
                                        : MAX(count_cpu_cores(), 2)) / 4 + 1;
        } else {
            n_revalidators_requested = n_revalidators_;
        }
    } else {
        int threads = MAX(count_cpu_cores(), 2);

        n_revalidators_requested = MAX(n_revalidators_, 0);
        n_handlers_requested = MAX(n_handlers_, 0);

        if (!n_revalidators_requested) {
            n_revalidators_requested = n_handlers_requested
                    ? MAX(threads - (int) n_handlers_requested, 1)
                    : threads / 4 + 1;
        }

        if (!n_handlers_requested) {
            n_handlers_requested = MAX(threads -
                                       (int) n_revalidators_requested, 1);
        }
    }

    if (udpif->n_handlers != n_handlers_requested
        || udpif->n_revalidators != n_revalidators_requested) {
        if (forced) {
            VLOG_INFO("Overriding n-handler-threads to %u, setting "
                      "n-revalidator-threads to %u", n_handlers_requested,
                      n_revalidators_requested);
        } else {
            VLOG_INFO("Setting n-handler-threads to %u, setting "
                      "n-revalidator-threads to %u", n_handlers_requested,
                      n_revalidators_requested);
        }
        udpif_stop_threads(udpif, true);
    }

    if (!udpif->handlers && !udpif->revalidators) {
        VLOG_INFO("Starting %u threads", n_handlers_requested +
                                         n_revalidators_requested);
        int error;
        error = dpif_handlers_set(udpif->dpif, n_handlers_requested);
        if (error) {
            VLOG_ERR("failed to configure handlers in dpif %s: %s",
                     dpif_name(udpif->dpif), ovs_strerror(error));
            return;
        }
        udpif_start_threads(udpif, n_handlers_requested,
                            n_revalidators_requested);
    }
}

/* Notifies 'udpif' that something changed which may render previous
 * xlate_actions() results invalid. */
void
udpif_revalidate(struct udpif *udpif)
{
    seq_change(udpif->reval_seq);
}

/* Returns a seq which increments every time 'udpif' pulls stats from the
 * datapath.  Callers can use this to get a sense of when might be a good time
 * to do periodic work which relies on relatively up to date statistics. */
struct seq *
udpif_dump_seq(struct udpif *udpif)
{
    return udpif->dump_seq;
}

void
udpif_get_memory_usage(struct udpif *udpif, struct simap *usage)
{
    size_t i;

    simap_increase(usage, "handlers", udpif->n_handlers);

    simap_increase(usage, "revalidators", udpif->n_revalidators);
    for (i = 0; i < N_UMAPS; i++) {
        simap_increase(usage, "udpif keys", cmap_count(&udpif->ukeys[i].cmap));
    }
}

/* Remove flows from a single datapath. */
void
udpif_flush(struct udpif *udpif)
{
    uint32_t n_handlers_ = udpif->n_handlers;
    uint32_t n_revalidators_ = udpif->n_revalidators;

    udpif_stop_threads(udpif, true);
    dpif_flow_flush(udpif->dpif);
    udpif_start_threads(udpif, n_handlers_, n_revalidators_);
}

/* Removes all flows from all datapaths. */
static void
udpif_flush_all_datapaths(void)
{
    struct udpif *udpif;

    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
        udpif_flush(udpif);
    }
}

static bool
udpif_use_ufid(struct udpif *udpif)
{
    bool enable;

    atomic_read_relaxed(&enable_ufid, &enable);
    return enable && udpif->backer->rt_support.ufid;
}


static unsigned long
udpif_get_n_flows(struct udpif *udpif)
{
    long long int time, now;
    unsigned long flow_count;

    now = time_msec();
    atomic_read_relaxed(&udpif->n_flows_timestamp, &time);
    if (time < now - 100 && !ovs_mutex_trylock(&udpif->n_flows_mutex)) {
        struct dpif_dp_stats stats;

        atomic_store_relaxed(&udpif->n_flows_timestamp, now);
        dpif_get_dp_stats(udpif->dpif, &stats);
        flow_count = stats.n_flows;

        if (!dpif_synced_dp_layers(udpif->dpif)) {
            /* If the dpif layer does not sync the flows, we need to include
             * the hardware offloaded flows separately. */
            uint64_t hw_flows;

            if (!dpif_get_n_offloaded_flows(udpif->dpif, &hw_flows)) {
                flow_count += hw_flows;
            }
        }

        atomic_store_relaxed(&udpif->n_flows, flow_count);
        ovs_mutex_unlock(&udpif->n_flows_mutex);
    } else {
        atomic_read_relaxed(&udpif->n_flows, &flow_count);
    }
    return flow_count;
}

/* The upcall handler thread tries to read a batch of UPCALL_MAX_BATCH
 * upcalls from dpif, processes the batch and installs corresponding flows
 * in dpif. */
static void *
udpif_upcall_handler(void *arg)
{
    struct handler *handler = arg;
    struct udpif *udpif = handler->udpif;

    while (!latch_is_set(&handler->udpif->exit_latch)) {
        if (recv_upcalls(handler)) {
            poll_immediate_wake();
        } else {
            dpif_recv_wait(udpif->dpif, handler->handler_id);
            latch_wait(&udpif->exit_latch);
        }
        poll_block();
    }

    return NULL;
}

static size_t
recv_upcalls(struct handler *handler)
{
    struct udpif *udpif = handler->udpif;
    uint64_t recv_stubs[UPCALL_MAX_BATCH][512 / 8];
    struct ofpbuf recv_bufs[UPCALL_MAX_BATCH];
    struct dpif_upcall dupcalls[UPCALL_MAX_BATCH];
    struct upcall upcalls[UPCALL_MAX_BATCH];
    struct flow flows[UPCALL_MAX_BATCH];
    size_t n_upcalls, i;

    n_upcalls = 0;
    while (n_upcalls < UPCALL_MAX_BATCH) {
        struct ofpbuf *recv_buf = &recv_bufs[n_upcalls];
        struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
        struct upcall *upcall = &upcalls[n_upcalls];
        struct flow *flow = &flows[n_upcalls];
        unsigned int mru = 0;
        char *errorp = NULL;
        uint64_t hash = 0;
        int error;

        ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
                        sizeof recv_stubs[n_upcalls]);
        if (dpif_recv(udpif->dpif, handler->handler_id, dupcall, recv_buf)) {
            ofpbuf_uninit(recv_buf);
            break;
        }

        upcall->fitness = odp_flow_key_to_flow(dupcall->key, dupcall->key_len,
                                               flow, NULL);
        if (upcall->fitness == ODP_FIT_ERROR) {
            goto free_dupcall;
        }

        if (dupcall->mru) {
            mru = nl_attr_get_u16(dupcall->mru);
        }

        if (dupcall->hash) {
            hash = nl_attr_get_u64(dupcall->hash);
        }

        error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
                               dupcall->type, dupcall->userdata, flow, mru,
                               &dupcall->ufid, PMD_ID_NULL, &errorp);
        if (error) {
            if (error == ENODEV) {
                /* Received packet on datapath port for which we couldn't
                 * associate an ofproto.  This can happen if a port is removed
                 * while traffic is being received.  Print a rate-limited
                 * message in case it happens frequently. */
                dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall->key,
                              dupcall->key_len, NULL, 0, NULL, 0,
                              &dupcall->ufid, PMD_ID_NULL, NULL);
                VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
                             "port %"PRIu32"%s%s%s", flow->in_port.odp_port,
                             errorp ? " (" : "", errorp ? errorp : "",
                             errorp ? ")" : "");
            }
            free(errorp);
            goto free_dupcall;
        }

        upcall->key = dupcall->key;
        upcall->key_len = dupcall->key_len;
        upcall->ufid = &dupcall->ufid;
        upcall->hash = hash;
        upcall->pid = dupcall->pid;

        upcall->out_tun_key = dupcall->out_tun_key;
        upcall->actions = dupcall->actions;

        pkt_metadata_from_flow(&dupcall->packet.md, flow);
        flow_extract(&dupcall->packet, flow);

        error = process_upcall(udpif, upcall,
                               &upcall->odp_actions, &upcall->wc);
        if (error) {
            goto cleanup;
        }

        n_upcalls++;
        continue;

cleanup:
        upcall_uninit(upcall);
free_dupcall:
        dp_packet_uninit(&dupcall->packet);
        ofpbuf_uninit(recv_buf);
    }

    if (n_upcalls) {
        handle_upcalls(handler->udpif, upcalls, n_upcalls);
        for (i = 0; i < n_upcalls; i++) {
            dp_packet_uninit(&dupcalls[i].packet);
            ofpbuf_uninit(&recv_bufs[i]);
            upcall_uninit(&upcalls[i]);
        }
    }

    return n_upcalls;
}

static void
udpif_run_flow_rebalance(struct udpif *udpif)
{
    long long int now = 0;

    /* Don't rebalance if OFFL_REBAL_INTVL_MSEC have not elapsed */
    now = time_msec();
    if (now < udpif->offload_rebalance_time + OFFL_REBAL_INTVL_MSEC) {
        return;
    }

    if (!netdev_any_oor()) {
        return;
    }

    VLOG_DBG("Offload rebalance: Found OOR netdevs");
    udpif->offload_rebalance_time = now;
    udpif_flow_rebalance(udpif);
}

static void *
udpif_revalidator(void *arg)
{
    /* Used by all revalidators. */
    struct revalidator *revalidator = arg;
    struct udpif *udpif = revalidator->udpif;
    bool leader = revalidator == &udpif->revalidators[0];

    /* Used only by the leader. */
    long long int start_time = 0;
    uint64_t last_reval_seq = 0;
    size_t n_flows = 0;

    revalidator->id = ovsthread_id_self();
    for (;;) {
        if (leader) {
            uint64_t reval_seq;

            recirc_run(); /* Recirculation cleanup. */

            reval_seq = seq_read(udpif->reval_seq);
            last_reval_seq = reval_seq;

            n_flows = udpif_get_n_flows(udpif);
            udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows);
            udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2;

            /* Only the leader checks the pause latch to prevent a race where
             * some threads think it's false and proceed to block on
             * reval_barrier and others think it's true and block indefinitely
             * on the pause_barrier */
            udpif->pause = latch_is_set(&udpif->pause_latch);

            /* Only the leader checks the exit latch to prevent a race where
             * some threads think it's true and exit and others think it's
             * false and block indefinitely on the reval_barrier */
            udpif->reval_exit = latch_is_set(&udpif->exit_latch);

            start_time = time_msec();
            if (!udpif->reval_exit && !udpif->pause) {
                bool terse_dump;

                terse_dump = udpif_use_ufid(udpif);
                udpif->dump = dpif_flow_dump_create(udpif->dpif, terse_dump,
                                                    NULL);
                OVS_USDT_PROBE(udpif_revalidator, start_dump, udpif, n_flows);
            }
        }

        /* Wait for the leader to reach this point. */
        ovs_barrier_block(&udpif->reval_barrier);
        if (udpif->pause) {
            revalidator_pause(revalidator);
            if (!udpif->reval_exit) {
                /* The main thread resumed all validators, but the leader
                 * didn't start the dump, go to next iteration. */
                continue;
            }
        }

        if (udpif->reval_exit) {
            break;
        }
        revalidate(revalidator);

        /* Wait for all flows to have been dumped before we garbage collect. */
        ovs_barrier_block(&udpif->reval_barrier);
        revalidator_sweep(revalidator);

        /* Wait for all revalidators to finish garbage collection. */
        ovs_barrier_block(&udpif->reval_barrier);

        if (leader) {
            unsigned int flow_limit;
            long long int duration;

            atomic_read_relaxed(&udpif->flow_limit, &flow_limit);

            dpif_flow_dump_destroy(udpif->dump);
            seq_change(udpif->dump_seq);
            if (netdev_is_offload_rebalance_policy_enabled()) {
                udpif_run_flow_rebalance(udpif);
            }

            duration = MAX(time_msec() - start_time, 1);
            udpif->dump_duration = duration;
            if (duration > 2000) {
                flow_limit /= duration / 1000;
                COVERAGE_INC(upcall_flow_limit_scaled);
            } else if (duration > 1300) {
                flow_limit = flow_limit * 3 / 4;
                COVERAGE_INC(upcall_flow_limit_reduced);
            } else if (duration < 1000 &&
                       flow_limit < n_flows * 1000 / duration) {
                flow_limit += 1000;
                COVERAGE_INC(upcall_flow_limit_grew);
            }
            flow_limit = MIN(ofproto_flow_limit, MAX(flow_limit, 1000));
            atomic_store_relaxed(&udpif->flow_limit, flow_limit);

            if (duration > 2000) {
                VLOG_WARN("Spent an unreasonably long %lldms dumping flows",
                          duration);
            }

            OVS_USDT_PROBE(udpif_revalidator, sweep_done, udpif, n_flows,
                           MIN(ofproto_max_idle, ofproto_max_revalidator));

            poll_timer_wait_until(start_time + MIN(ofproto_max_idle,
                                                   ofproto_max_revalidator));
            seq_wait(udpif->reval_seq, last_reval_seq);
            latch_wait(&udpif->exit_latch);
            latch_wait(&udpif->pause_latch);
            poll_block();

            if (!latch_is_set(&udpif->pause_latch) &&
                !latch_is_set(&udpif->exit_latch)) {
                long long int now = time_msec();
                /* Block again if we are woken up within 5ms of the last start
                 * time. */
                start_time += 5;

                if (now < start_time) {
                    poll_timer_wait_until(start_time);
                    latch_wait(&udpif->exit_latch);
                    latch_wait(&udpif->pause_latch);
                    poll_block();
                }
            }
        }
    }

    return NULL;
}

static enum upcall_type
classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
                struct user_action_cookie *cookie)
{
    /* First look at the upcall type. */
    switch (type) {
    case DPIF_UC_ACTION:
        break;

    case DPIF_UC_MISS:
        return MISS_UPCALL;

    case DPIF_N_UC_TYPES:
    default:
        VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, type);
        return BAD_UPCALL;
    }

    /* "action" upcalls need a closer look. */
    if (!userdata) {
        VLOG_WARN_RL(&rl, "action upcall missing cookie");
        return BAD_UPCALL;
    }

    size_t userdata_len = nl_attr_get_size(userdata);
    if (userdata_len != sizeof *cookie) {
        VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
                     userdata_len);
        return BAD_UPCALL;
    }
    memcpy(cookie, nl_attr_get(userdata), sizeof *cookie);
    if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
        return SFLOW_UPCALL;
    } else if (cookie->type == USER_ACTION_COOKIE_SLOW_PATH) {
        return SLOW_PATH_UPCALL;
    } else if (cookie->type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
        return FLOW_SAMPLE_UPCALL;
    } else if (cookie->type == USER_ACTION_COOKIE_IPFIX) {
        return IPFIX_UPCALL;
    } else if (cookie->type == USER_ACTION_COOKIE_CONTROLLER) {
        return CONTROLLER_UPCALL;
    } else {
        VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
                     " and size %"PRIuSIZE, cookie->type, userdata_len);
        return BAD_UPCALL;
    }
}

/* Calculates slow path actions for 'xout'.  'buf' must statically be
 * initialized with at least 128 bytes of space. */
static void
compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
                  odp_port_t odp_in_port, ofp_port_t ofp_in_port,
                  struct ofpbuf *buf, uint32_t meter_id,
                  struct uuid *ofproto_uuid)
{
    struct user_action_cookie cookie;
    odp_port_t port;
    uint32_t pid;

    memset(&cookie, 0, sizeof cookie);
    cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
    cookie.ofp_in_port = ofp_in_port;
    cookie.ofproto_uuid = *ofproto_uuid;
    cookie.slow_path.reason = xout->slow;

    port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
        ? ODPP_NONE
        : odp_in_port;
    pid = dpif_port_get_pid(udpif->dpif, port);

    size_t offset;
    size_t ac_offset;
    if (meter_id != UINT32_MAX) {
        /* If slowpath meter is configured, generate clone(meter, userspace)
         * action. */
        offset = nl_msg_start_nested(buf, OVS_ACTION_ATTR_SAMPLE);
        nl_msg_put_u32(buf, OVS_SAMPLE_ATTR_PROBABILITY, UINT32_MAX);
        ac_offset = nl_msg_start_nested(buf, OVS_SAMPLE_ATTR_ACTIONS);
        nl_msg_put_u32(buf, OVS_ACTION_ATTR_METER, meter_id);
    }

    odp_put_userspace_action(pid, &cookie, sizeof cookie,
                             ODPP_NONE, false, buf, NULL);

    if (meter_id != UINT32_MAX) {
        nl_msg_end_nested(buf, ac_offset);
        nl_msg_end_nested(buf, offset);
    }
}

/* If there is no error, the upcall must be destroyed with upcall_uninit()
 * before quiescing, as the referred objects are guaranteed to exist only
 * until the calling thread quiesces.  Otherwise, do not call upcall_uninit()
 * since the 'upcall->put_actions' remains uninitialized. */
static int
upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
               const struct dp_packet *packet, enum dpif_upcall_type type,
               const struct nlattr *userdata, const struct flow *flow,
               const unsigned int mru,
               const ovs_u128 *ufid, const unsigned pmd_id,
               char **errorp)
{
    int error;

    upcall->type = classify_upcall(type, userdata, &upcall->cookie);
    if (upcall->type == BAD_UPCALL) {
        return EAGAIN;
    } else if (upcall->type == MISS_UPCALL) {
        error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix,
                             &upcall->sflow, NULL, &upcall->ofp_in_port,
                             errorp);
        if (error) {
            return error;
        }
    } else {
        struct ofproto_dpif *ofproto
            = ofproto_dpif_lookup_by_uuid(&upcall->cookie.ofproto_uuid);
        if (!ofproto) {
            if (errorp) {
                *errorp = xstrdup("upcall could not find ofproto");
            } else {
                VLOG_INFO_RL(&rl, "upcall could not find ofproto");
            }
            return ENODEV;
        }
        upcall->ofproto = ofproto;
        upcall->ipfix = ofproto->ipfix;
        upcall->sflow = ofproto->sflow;
        upcall->ofp_in_port = upcall->cookie.ofp_in_port;
    }

    upcall->recirc = NULL;
    upcall->have_recirc_ref = false;
    upcall->flow = flow;
    upcall->packet = packet;
    upcall->ufid = ufid;
    upcall->pmd_id = pmd_id;
    ofpbuf_use_stub(&upcall->odp_actions, upcall->odp_actions_stub,
                    sizeof upcall->odp_actions_stub);
    ofpbuf_init(&upcall->put_actions, 0);

    upcall->xout_initialized = false;
    upcall->ukey_persists = false;

    upcall->ukey = NULL;
    upcall->key = NULL;
    upcall->key_len = 0;
    upcall->mru = mru;
    upcall->pid = 0;

    upcall->out_tun_key = NULL;
    upcall->actions = NULL;

    return 0;
}

static void
upcall_xlate(struct udpif *udpif, struct upcall *upcall,
             struct ofpbuf *odp_actions, struct flow_wildcards *wc)
{
    struct dpif_flow_stats stats;
    enum xlate_error xerr;
    struct xlate_in xin;
    struct ds output;

    stats.n_packets = 1;
    stats.n_bytes = dp_packet_size(upcall->packet);
    stats.used = time_msec();
    stats.tcp_flags = ntohs(upcall->flow->tcp_flags);

    xlate_in_init(&xin, upcall->ofproto,
                  ofproto_dpif_get_tables_version(upcall->ofproto),
                  upcall->flow, upcall->ofp_in_port, NULL,
                  stats.tcp_flags, upcall->packet, wc, odp_actions);

    if (upcall->type == MISS_UPCALL) {
        xin.resubmit_stats = &stats;

        if (xin.frozen_state) {
            /* We may install a datapath flow only if we get a reference to the
             * recirculation context (otherwise we could have recirculation
             * upcalls using recirculation ID for which no context can be
             * found).  We may still execute the flow's actions even if we
             * don't install the flow. */
            upcall->recirc = recirc_id_node_from_state(xin.frozen_state);
            upcall->have_recirc_ref = recirc_id_node_try_ref_rcu(upcall->recirc);
        }
    } else {
        /* For non-miss upcalls, we are either executing actions (one of which
         * is an userspace action) for an upcall, in which case the stats have
         * already been taken care of, or there's a flow in the datapath which
         * this packet was accounted to.  Presumably the revalidators will deal
         * with pushing its stats eventually. */
    }

    upcall->reval_seq = seq_read(udpif->reval_seq);

    xerr = xlate_actions(&xin, &upcall->xout);

    /* Translate again and log the ofproto trace for
     * these two error types. */
    if (xerr == XLATE_RECURSION_TOO_DEEP ||
        xerr == XLATE_TOO_MANY_RESUBMITS) {
        static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 1);

        /* This is a huge log, so be conservative. */
        if (!VLOG_DROP_WARN(&rll)) {
            ds_init(&output);
            ofproto_trace(upcall->ofproto, upcall->flow,
                          upcall->packet, NULL, 0, NULL, &output,
                          false);
            VLOG_WARN("%s", ds_cstr(&output));
            ds_destroy(&output);
        }
    }

    if (wc) {
        /* Convert the input port wildcard from OFP to ODP format. There's no
         * real way to do this for arbitrary bitmasks since the numbering spaces
         * aren't the same. However, flow translation always exact matches the
         * whole thing, so we can do the same here. */
        WC_MASK_FIELD(wc, in_port.odp_port);
    }

    upcall->xout_initialized = true;

    if (upcall->fitness == ODP_FIT_TOO_LITTLE) {
        upcall->xout.slow |= SLOW_MATCH;
    }
    if (!upcall->xout.slow) {
        ofpbuf_use_const(&upcall->put_actions,
                         odp_actions->data, odp_actions->size);
    } else {
        /* upcall->put_actions already initialized by upcall_receive(). */
        compose_slow_path(udpif, &upcall->xout,
                          upcall->flow->in_port.odp_port, upcall->ofp_in_port,
                          &upcall->put_actions,
                          upcall->ofproto->up.slowpath_meter_id,
                          &upcall->ofproto->uuid);
    }

    /* This function is also called for slow-pathed flows.  As we are only
     * going to create new datapath flows for actual datapath misses, there is
     * no point in creating a ukey otherwise. */
    if (upcall->type == MISS_UPCALL) {
        upcall->ukey = ukey_create_from_upcall(upcall, wc);
    }
}

static void
upcall_uninit(struct upcall *upcall)
{
    if (upcall) {
        if (upcall->xout_initialized) {
            xlate_out_uninit(&upcall->xout);
        }
        ofpbuf_uninit(&upcall->odp_actions);
        ofpbuf_uninit(&upcall->put_actions);
        if (upcall->ukey) {
            if (!upcall->ukey_persists) {
                ukey_delete__(upcall->ukey);
            }
        } else if (upcall->have_recirc_ref) {
            /* The reference was transferred to the ukey if one was created. */
            recirc_id_node_unref(upcall->recirc);
        }
    }
}

/* If there are less flows than the limit, and this is a miss upcall which
 *
 *      - Has no recirc_id, OR
 *      - Has a recirc_id and we can get a reference on the recirc ctx,
 *
 * Then we should install the flow (true). Otherwise, return false. */
static bool
should_install_flow(struct udpif *udpif, struct upcall *upcall)
{
    unsigned int flow_limit;

    if (upcall->type != MISS_UPCALL) {
        return false;
    } else if (upcall->recirc && !upcall->have_recirc_ref) {
        VLOG_DBG_RL(&rl, "upcall: no reference for recirc flow");
        return false;
    }

    atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
    if (udpif_get_n_flows(udpif) >= flow_limit) {
        COVERAGE_INC(upcall_flow_limit_hit);
        VLOG_WARN_RL(&rl,
                     "upcall: datapath reached the dynamic limit of %u flows.",
                     flow_limit);
        return false;
    }

    return true;
}

static int
upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
          unsigned pmd_id, enum dpif_upcall_type type,
          const struct nlattr *userdata, struct ofpbuf *actions,
          struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
{
    struct udpif *udpif = aux;
    struct upcall upcall;
    bool megaflow;
    int error;

    atomic_read_relaxed(&enable_megaflows, &megaflow);

    error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
                           flow, 0, ufid, pmd_id, NULL);
    if (error) {
        return error;
    }

    upcall.fitness = ODP_FIT_PERFECT;
    error = process_upcall(udpif, &upcall, actions, wc);
    if (error) {
        goto out;
    }

    if (upcall.xout.slow && put_actions) {
        ofpbuf_put(put_actions, upcall.put_actions.data,
                   upcall.put_actions.size);
    }

    if (OVS_UNLIKELY(!megaflow && wc)) {
        flow_wildcards_init_for_packet(wc, flow);
    }

    if (!should_install_flow(udpif, &upcall)) {
        error = ENOSPC;
        goto out;
    }

    if (upcall.ukey && !ukey_install(udpif, upcall.ukey)) {
        error = ENOSPC;
    }
out:
    if (!error) {
        upcall.ukey_persists = true;
    }
    upcall_uninit(&upcall);
    return error;
}

static size_t
dpif_get_actions(struct udpif *udpif, struct upcall *upcall,
                 const struct nlattr **actions)
{
    size_t actions_len = 0;

    if (upcall->actions) {
        /* Actions were passed up from datapath. */
        *actions = nl_attr_get(upcall->actions);
        actions_len = nl_attr_get_size(upcall->actions);
    }

    if (actions_len == 0) {
        /* Lookup actions in userspace cache. */
        struct udpif_key *ukey = ukey_lookup(udpif, upcall->ufid,
                                             upcall->pmd_id);
        if (ukey) {
            ukey_get_actions(ukey, actions, &actions_len);
        }
    }

    return actions_len;
}

static size_t
dpif_read_actions(struct udpif *udpif, struct upcall *upcall,
                  const struct flow *flow, enum upcall_type type,
                  void *upcall_data)
{
    const struct nlattr *actions = NULL;
    size_t actions_len = dpif_get_actions(udpif, upcall, &actions);

    if (!actions || !actions_len) {
        return 0;
    }

    switch (type) {
    case SFLOW_UPCALL:
        dpif_sflow_read_actions(flow, actions, actions_len, upcall_data, true);
        break;
    case FLOW_SAMPLE_UPCALL:
    case IPFIX_UPCALL:
        dpif_ipfix_read_actions(flow, actions, actions_len, upcall_data);
        break;
    case BAD_UPCALL:
    case MISS_UPCALL:
    case SLOW_PATH_UPCALL:
    case CONTROLLER_UPCALL:
    default:
        break;
    }

    return actions_len;
}

static int
process_upcall(struct udpif *udpif, struct upcall *upcall,
               struct ofpbuf *odp_actions, struct flow_wildcards *wc)
{
    const struct dp_packet *packet = upcall->packet;
    const struct flow *flow = upcall->flow;
    size_t actions_len = 0;

    switch (upcall->type) {
    case MISS_UPCALL:
    case SLOW_PATH_UPCALL:
        upcall_xlate(udpif, upcall, odp_actions, wc);
        return 0;

    case SFLOW_UPCALL:
        if (upcall->sflow) {
            struct dpif_sflow_actions sflow_actions;

            memset(&sflow_actions, 0, sizeof sflow_actions);

            actions_len = dpif_read_actions(udpif, upcall, flow,
                                            upcall->type, &sflow_actions);
            dpif_sflow_received(upcall->sflow, packet, flow,
                                flow->in_port.odp_port, &upcall->cookie,
                                actions_len > 0 ? &sflow_actions : NULL);
        }
        break;

    case IPFIX_UPCALL:
    case FLOW_SAMPLE_UPCALL:
        if (upcall->ipfix) {
            struct flow_tnl output_tunnel_key;
            struct dpif_ipfix_actions ipfix_actions;

            memset(&ipfix_actions, 0, sizeof ipfix_actions);

            if (upcall->out_tun_key) {
                if (odp_tun_key_from_attr(upcall->out_tun_key,
                                          &output_tunnel_key,
                                          NULL) != ODP_FIT_ERROR) {
                    return EINVAL;
                }
            }

            actions_len = dpif_read_actions(udpif, upcall, flow,
                                            upcall->type, &ipfix_actions);
            if (upcall->type == IPFIX_UPCALL) {
                dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
                                         flow->in_port.odp_port,
                                         upcall->cookie.ipfix.output_odp_port,
                                         upcall->out_tun_key ?
                                             &output_tunnel_key : NULL,
                                         actions_len > 0 ?
                                             &ipfix_actions: NULL);
            } else {
                /* The flow reflects exactly the contents of the packet.
                 * Sample the packet using it. */
                dpif_ipfix_flow_sample(upcall->ipfix, packet, flow,
                                       &upcall->cookie, flow->in_port.odp_port,
                                       upcall->out_tun_key ?
                                           &output_tunnel_key : NULL,
                                       actions_len > 0 ? &ipfix_actions: NULL);
            }
        }
        break;

    case CONTROLLER_UPCALL:
        {
            struct user_action_cookie *cookie = &upcall->cookie;

            if (cookie->controller.dont_send) {
                return 0;
            }

            uint32_t recirc_id = cookie->controller.recirc_id;
            if (!recirc_id) {
                break;
            }

            const struct recirc_id_node *recirc_node
                                = recirc_id_node_find(recirc_id);
            if (!recirc_node) {
                break;
            }

            const struct frozen_state *state = &recirc_node->state;

            struct ofproto_async_msg *am = xmalloc(sizeof *am);
            *am = (struct ofproto_async_msg) {
                .controller_id = cookie->controller.controller_id,
                .oam = OAM_PACKET_IN,
                .pin = {
                    .up = {
                        .base = {
                            .packet = xmemdup(dp_packet_data(packet),
                                              dp_packet_size(packet)),
                            .packet_len = dp_packet_size(packet),
                            .reason = cookie->controller.reason,
                            .table_id = state->table_id,
                            .cookie = get_32aligned_be64(
                                         &cookie->controller.rule_cookie),
                            .userdata = (recirc_node->state.userdata_len
                                     ? xmemdup(recirc_node->state.userdata,
                                               recirc_node->state.userdata_len)
                                      : NULL),
                            .userdata_len = recirc_node->state.userdata_len,
                        },
                    },
                    .max_len = cookie->controller.max_len,
                },
            };

            if (cookie->controller.continuation) {
                am->pin.up.stack = (state->stack_size
                          ? xmemdup(state->stack, state->stack_size)
                          : NULL),
                am->pin.up.stack_size = state->stack_size,
                am->pin.up.mirrors = state->mirrors,
                am->pin.up.conntracked = state->conntracked,
                am->pin.up.actions = (state->ofpacts_len
                            ? xmemdup(state->ofpacts,
                                      state->ofpacts_len) : NULL),
                am->pin.up.actions_len = state->ofpacts_len,
                am->pin.up.action_set = (state->action_set_len
                               ? xmemdup(state->action_set,
                                         state->action_set_len)
                               : NULL),
                am->pin.up.action_set_len = state->action_set_len,
                am->pin.up.bridge = upcall->ofproto->uuid;
                am->pin.up.odp_port = upcall->packet->md.in_port.odp_port;
            }

            /* We don't want to use the upcall 'flow', since it may be
             * more specific than the point at which the "controller"
             * action was specified. */
            struct flow frozen_flow;

            frozen_flow = *flow;
            if (!state->conntracked) {
                flow_clear_conntrack(&frozen_flow);
            }

            frozen_metadata_to_flow(&upcall->ofproto->up, &state->metadata,
                                    &frozen_flow);
            flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata);

            ofproto_dpif_send_async_msg(upcall->ofproto, am);
        }
        break;

    case BAD_UPCALL:
        break;
    }

    return EAGAIN;
}

static void
handle_upcalls(struct udpif *udpif, struct upcall *upcalls,
               size_t n_upcalls)
{
    struct dpif_op *opsp[UPCALL_MAX_BATCH * 2];
    struct ukey_op ops[UPCALL_MAX_BATCH * 2];
    size_t n_ops, n_opsp, i;

    /* Handle the packets individually in order of arrival.
     *
     *   - For SLOW_CFM, SLOW_LACP, SLOW_STP, SLOW_BFD, and SLOW_LLDP,
     *     translation is what processes received packets for these
     *     protocols.
     *
     *   - For SLOW_ACTION, translation executes the actions directly.
     *
     * The loop fills 'ops' with an array of operations to execute in the
     * datapath. */
    n_ops = 0;
    for (i = 0; i < n_upcalls; i++) {
        struct upcall *upcall = &upcalls[i];
        const struct dp_packet *packet = upcall->packet;
        struct ukey_op *op;

        if (should_install_flow(udpif, upcall)) {
            struct udpif_key *ukey = upcall->ukey;

            if (ukey_install(udpif, ukey)) {
                upcall->ukey_persists = true;
                put_op_init(&ops[n_ops++], ukey, DPIF_FP_CREATE);
            }
        }

        if (upcall->odp_actions.size) {
            op = &ops[n_ops++];
            op->ukey = NULL;
            op->dop.type = DPIF_OP_EXECUTE;
            op->dop.execute.packet = CONST_CAST(struct dp_packet *, packet);
            op->dop.execute.flow = upcall->flow;
            odp_key_to_dp_packet(upcall->key, upcall->key_len,
                                 op->dop.execute.packet);
            op->dop.execute.actions = upcall->odp_actions.data;
            op->dop.execute.actions_len = upcall->odp_actions.size;
            op->dop.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
            op->dop.execute.probe = false;
            op->dop.execute.mtu = upcall->mru;
            op->dop.execute.hash = upcall->hash;
            op->dop.execute.upcall_pid = upcall->pid;
        }
    }

    /* Execute batch. */
    n_opsp = 0;
    for (i = 0; i < n_ops; i++) {
        opsp[n_opsp++] = &ops[i].dop;
    }
    dpif_operate(udpif->dpif, opsp, n_opsp, DPIF_OFFLOAD_AUTO);
    for (i = 0; i < n_ops; i++) {
        struct udpif_key *ukey = ops[i].ukey;

        if (ukey) {
            ovs_mutex_lock(&ukey->mutex);
            if (ops[i].dop.error) {
                transition_ukey(ukey, UKEY_EVICTED);
            } else if (ukey->state < UKEY_OPERATIONAL) {
                transition_ukey(ukey, UKEY_OPERATIONAL);
            }
            ovs_mutex_unlock(&ukey->mutex);
        }
    }
}

static uint32_t
get_ukey_hash(const ovs_u128 *ufid, const unsigned pmd_id)
{
    return hash_2words(ufid->u32[0], pmd_id);
}

static struct udpif_key *
ukey_lookup(struct udpif *udpif, const ovs_u128 *ufid, const unsigned pmd_id)
{
    struct udpif_key *ukey;
    int idx = get_ukey_hash(ufid, pmd_id) % N_UMAPS;
    struct cmap *cmap = &udpif->ukeys[idx].cmap;

    CMAP_FOR_EACH_WITH_HASH (ukey, cmap_node,
                             get_ukey_hash(ufid, pmd_id), cmap) {
        if (ovs_u128_equals(ukey->ufid, *ufid)) {
            return ukey;
        }
    }
    return NULL;
}

/* Provides safe lockless access of RCU protected 'ukey->actions'.  Callers may
 * alternatively access the field directly if they take 'ukey->mutex'. */
static void
ukey_get_actions(struct udpif_key *ukey, const struct nlattr **actions, size_t *size)
{
    const struct ofpbuf *buf = ovsrcu_get(struct ofpbuf *, &ukey->actions);
    *actions = buf->data;
    *size = buf->size;
}

static void
ukey_set_actions(struct udpif_key *ukey, const struct ofpbuf *actions)
{
    struct ofpbuf *old_actions = ovsrcu_get_protected(struct ofpbuf *,
                                                      &ukey->actions);

    if (old_actions) {
        ovsrcu_postpone(ofpbuf_delete, old_actions);
    }

    ovsrcu_set(&ukey->actions, ofpbuf_clone(actions));
}

static struct udpif_key *
ukey_create__(const struct nlattr *key, size_t key_len,
              const struct nlattr *mask, size_t mask_len,
              bool ufid_present, const ovs_u128 *ufid,
              const unsigned pmd_id, const struct ofpbuf *actions,
              uint64_t reval_seq, long long int used,
              uint32_t key_recirc_id, struct xlate_out *xout)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    struct udpif_key *ukey = xmalloc(sizeof *ukey);

    memcpy(&ukey->keybuf, key, key_len);
    ukey->key = &ukey->keybuf.nla;
    ukey->key_len = key_len;
    memcpy(&ukey->maskbuf, mask, mask_len);
    ukey->mask = &ukey->maskbuf.nla;
    ukey->mask_len = mask_len;
    ukey->ufid_present = ufid_present;
    ukey->ufid = *ufid;
    ukey->pmd_id = pmd_id;
    ukey->hash = get_ukey_hash(&ukey->ufid, pmd_id);

    ovsrcu_init(&ukey->actions, NULL);
    ukey_set_actions(ukey, actions);

    ovs_mutex_init(&ukey->mutex);
    ukey->dump_seq = 0;     /* Not yet dumped */
    ukey->reval_seq = reval_seq;
    ukey->state = UKEY_CREATED;
    ukey->state_thread = ovsthread_id_self();
    ukey->state_where = OVS_SOURCE_LOCATOR;
    ukey->created = ukey->flow_time = time_msec();
    ukey->missed_dumps = 0;
    memset(&ukey->stats, 0, sizeof ukey->stats);
    ukey->stats.used = used;
    ukey->dp_layer = NULL;
    ukey->xcache = NULL;

    ukey->offloaded = false;
    ukey->in_netdev = NULL;
    ukey->flow_packets = ukey->flow_backlog_packets = 0;

    ukey->key_recirc_id = key_recirc_id;
    recirc_refs_init(&ukey->recircs);
    if (xout) {
        /* Take ownership of the action recirc id references. */
        recirc_refs_swap(&ukey->recircs, &xout->recircs);
    }

    return ukey;
}

static struct udpif_key *
ukey_create_from_upcall(struct upcall *upcall, struct flow_wildcards *wc)
{
    struct odputil_keybuf keystub, maskstub;
    struct ofpbuf keybuf, maskbuf;
    bool megaflow;
    struct odp_flow_key_parms odp_parms = {
        .flow = upcall->flow,
        .mask = wc ? &wc->masks : NULL,
    };

    odp_parms.support = upcall->ofproto->backer->rt_support.odp;
    if (upcall->key_len) {
        ofpbuf_use_const(&keybuf, upcall->key, upcall->key_len);
    } else {
        /* dpif-netdev doesn't provide a netlink-formatted flow key in the
         * upcall, so convert the upcall's flow here. */
        ofpbuf_use_stack(&keybuf, &keystub, sizeof keystub);
        odp_flow_key_from_flow(&odp_parms, &keybuf);
    }

    atomic_read_relaxed(&enable_megaflows, &megaflow);
    ofpbuf_use_stack(&maskbuf, &maskstub, sizeof maskstub);
    if (megaflow && wc) {
        odp_parms.key_buf = &keybuf;
        odp_flow_key_from_mask(&odp_parms, &maskbuf);
    }

    return ukey_create__(keybuf.data, keybuf.size, maskbuf.data, maskbuf.size,
                         true, upcall->ufid, upcall->pmd_id,
                         &upcall->put_actions, upcall->reval_seq, 0,
                         upcall->have_recirc_ref ? upcall->recirc->id : 0,
                         &upcall->xout);
}

static int
ukey_create_from_dpif_flow(const struct udpif *udpif,
                           const struct dpif_flow *flow,
                           struct udpif_key **ukey)
{
    struct dpif_flow full_flow;
    struct ofpbuf actions;
    uint64_t reval_seq;
    uint64_t stub[DPIF_FLOW_BUFSIZE / 8];
    const struct nlattr *a;
    unsigned int left;

    if (!flow->key_len || !flow->actions_len) {
        struct ofpbuf buf;
        int err;

        /* If the key or actions were not provided by the datapath, fetch the
         * full flow. */
        ofpbuf_use_stack(&buf, &stub, sizeof stub);
        err = dpif_flow_get(udpif->dpif, flow->key, flow->key_len,
                            flow->ufid_present ? &flow->ufid : NULL,
                            flow->pmd_id, &buf, &full_flow);
        if (err) {
            return err;
        }
        flow = &full_flow;
    }

    /* Check the flow actions for recirculation action.  As recirculation
     * relies on OVS userspace internal state, we need to delete all old
     * datapath flows with either a non-zero recirc_id in the key, or any
     * recirculation actions upon OVS restart. */
    NL_ATTR_FOR_EACH (a, left, flow->key, flow->key_len) {
        if (nl_attr_type(a) == OVS_KEY_ATTR_RECIRC_ID
            && nl_attr_get_u32(a) != 0) {
            return EINVAL;
        }
    }
    NL_ATTR_FOR_EACH (a, left, flow->actions, flow->actions_len) {
        if (nl_attr_type(a) == OVS_ACTION_ATTR_RECIRC) {
            return EINVAL;
        }
    }

    reval_seq = seq_read(udpif->reval_seq) - 1; /* Ensure revalidation. */
    ofpbuf_use_const(&actions, flow->actions, flow->actions_len);
    *ukey = ukey_create__(flow->key, flow->key_len,
                          flow->mask, flow->mask_len, flow->ufid_present,
                          &flow->ufid, flow->pmd_id, &actions,
                          reval_seq, flow->stats.used, 0, NULL);

    return 0;
}

static bool
try_ukey_replace(struct umap *umap, struct udpif_key *old_ukey,
                 struct udpif_key *new_ukey)
    OVS_REQUIRES(umap->mutex)
    OVS_TRY_LOCK(true, new_ukey->mutex)
{
    bool replaced = false;

    if (!ovs_mutex_trylock(&old_ukey->mutex)) {
        if (old_ukey->state == UKEY_EVICTED) {
            /* The flow was deleted during the current revalidator dump,
             * but its ukey won't be fully cleaned up until the sweep phase.
             * In the mean time, we are receiving upcalls for this traffic.
             * Expedite the (new) flow install by replacing the ukey. */
            ovs_mutex_lock(&new_ukey->mutex);
            cmap_replace(&umap->cmap, &old_ukey->cmap_node,
                         &new_ukey->cmap_node, new_ukey->hash);
            new_ukey->dump_seq = old_ukey->dump_seq;
            ovsrcu_postpone(ukey_delete__, old_ukey);
            transition_ukey(old_ukey, UKEY_DELETED);
            transition_ukey(new_ukey, UKEY_VISIBLE);
            replaced = true;
            COVERAGE_INC(upcall_ukey_replace);
        } else {
            COVERAGE_INC(handler_duplicate_upcall);
        }
        ovs_mutex_unlock(&old_ukey->mutex);
    } else {
        COVERAGE_INC(ukey_replace_contention);
    }

    return replaced;
}

/* Attempts to insert a ukey into the shared ukey maps.
 *
 * On success, returns true, installs the ukey and returns it in a locked
 * state. Otherwise, returns false. */
static bool
ukey_install__(struct udpif *udpif, struct udpif_key *new_ukey)
    OVS_TRY_LOCK(true, new_ukey->mutex)
{
    struct umap *umap;
    struct udpif_key *old_ukey;
    uint32_t idx;
    bool locked = false;

    idx = new_ukey->hash % N_UMAPS;
    umap = &udpif->ukeys[idx];
    ovs_mutex_lock(&umap->mutex);
    old_ukey = ukey_lookup(udpif, &new_ukey->ufid, new_ukey->pmd_id);
    if (old_ukey) {
        /* Uncommon case: A ukey is already installed with the same UFID. */
        if (old_ukey->key_len == new_ukey->key_len
            && !memcmp(old_ukey->key, new_ukey->key, new_ukey->key_len)) {
            locked = try_ukey_replace(umap, old_ukey, new_ukey);
        } else {
            struct ds ds = DS_EMPTY_INITIALIZER;

            odp_format_ufid(&old_ukey->ufid, &ds);
            ds_put_cstr(&ds, " ");
            odp_flow_key_format(old_ukey->key, old_ukey->key_len, &ds);
            ds_put_cstr(&ds, "\n");
            odp_format_ufid(&new_ukey->ufid, &ds);
            ds_put_cstr(&ds, " ");
            odp_flow_key_format(new_ukey->key, new_ukey->key_len, &ds);

            VLOG_WARN_RL(&rl, "Conflicting ukey for flows:\n%s", ds_cstr(&ds));
            ds_destroy(&ds);
        }
    } else {
        ovs_mutex_lock(&new_ukey->mutex);
        cmap_insert(&umap->cmap, &new_ukey->cmap_node, new_ukey->hash);
        transition_ukey(new_ukey, UKEY_VISIBLE);
        locked = true;
    }
    ovs_mutex_unlock(&umap->mutex);

    return locked;
}

static void
transition_ukey_at(struct udpif_key *ukey, enum ukey_state dst,
                   const char *where)
    OVS_REQUIRES(ukey->mutex)
{
    if (dst < ukey->state) {
        VLOG_ABORT("Invalid ukey transition %d->%d (last transitioned from "
                   "thread %u at %s)", ukey->state, dst, ukey->state_thread,
                   ukey->state_where);
    }
    if (ukey->state == dst && dst == UKEY_OPERATIONAL) {
        return;
    }

    /* Valid state transitions:
     * UKEY_CREATED -> UKEY_VISIBLE
     *  Ukey is now visible in the umap.
     * UKEY_VISIBLE -> UKEY_OPERATIONAL
     *  A handler has installed the flow, and the flow is in the datapath.
     * UKEY_VISIBLE -> UKEY_EVICTING
     *  A handler installs the flow, then revalidator sweeps the ukey before
     *  the flow is dumped. Most likely the flow was installed; start trying
     *  to delete it.
     * UKEY_VISIBLE -> UKEY_EVICTED
     *  A handler attempts to install the flow, but the datapath rejects it.
     *  Consider that the datapath has already destroyed it.
     * UKEY_OPERATIONAL -> UKEY_INCONSISTENT
     *  A revalidator modifies the flow with error returns.
     * UKEY_INCONSISTENT -> UKEY_EVICTING
     *  A revalidator decides to evict the datapath flow.
     * UKEY_OPERATIONAL -> UKEY_EVICTING
     *  A revalidator decides to evict the datapath flow.
     * UKEY_EVICTING    -> UKEY_EVICTED
     *  A revalidator has evicted the datapath flow.
     * UKEY_EVICTED     -> UKEY_DELETED
     *  A revalidator has removed the ukey from the umap and is deleting it.
     */
    if (ukey->state == dst - 1 ||
       (ukey->state == UKEY_VISIBLE && dst < UKEY_DELETED) ||
       (ukey->state == UKEY_OPERATIONAL && dst == UKEY_EVICTING)) {
        ukey->state = dst;
    } else {
        struct ds ds = DS_EMPTY_INITIALIZER;

        odp_format_ufid(&ukey->ufid, &ds);
        VLOG_WARN_RL(&rl, "Invalid state transition for ukey %s: %d -> %d",
                     ds_cstr(&ds), ukey->state, dst);
        ds_destroy(&ds);
    }
    ukey->state_thread = ovsthread_id_self();
    ukey->state_where = where;
}

static bool
ukey_install(struct udpif *udpif, struct udpif_key *ukey)
{
    bool installed;

    installed = ukey_install__(udpif, ukey);
    if (installed) {
        ovs_mutex_unlock(&ukey->mutex);
    }

    return installed;
}

/* Searches for a ukey in 'udpif->ukeys' that matches 'flow' and attempts to
 * lock the ukey. If the ukey does not exist, create it.
 *
 * Returns 0 on success, setting *result to the matching ukey and returning it
 * in a locked state. Otherwise, returns an errno and clears *result. EBUSY
 * indicates that another thread is handling this flow. Other errors indicate
 * an unexpected condition creating a new ukey.
 *
 * *error is an output parameter provided to appease the threadsafety analyser,
 * and its value matches the return value. */
static int
ukey_acquire(struct udpif *udpif, const struct dpif_flow *flow,
             struct udpif_key **result, int *error)
    OVS_TRY_LOCK(0, (*result)->mutex)
{
    struct udpif_key *ukey;
    int retval;

    ukey = ukey_lookup(udpif, &flow->ufid, flow->pmd_id);
    if (ukey) {
        retval = ovs_mutex_trylock(&ukey->mutex);
    } else {
        /* Usually we try to avoid installing flows from revalidator threads,
         * because locking on a umap may cause handler threads to block.
         * However there are certain cases, like when ovs-vswitchd is
         * restarted, where it is desirable to handle flows that exist in the
         * datapath gracefully (ie, don't just clear the datapath). */
        bool install;

        retval = ukey_create_from_dpif_flow(udpif, flow, &ukey);
        if (retval) {
            goto done;
        }
        install = ukey_install__(udpif, ukey);
        if (install) {
            retval = 0;
        } else {
            ukey_delete__(ukey);
            retval = EBUSY;
        }
    }

done:
    *error = retval;
    if (retval) {
        *result = NULL;
    } else {
        *result = ukey;
    }
    return retval;
}

static void
ukey_delete__(struct udpif_key *ukey)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    if (ukey) {
        if (ukey->key_recirc_id) {
            recirc_free_id(ukey->key_recirc_id);
        }
        recirc_refs_unref(&ukey->recircs);
        xlate_cache_delete(ukey->xcache);
        ofpbuf_delete(ovsrcu_get(struct ofpbuf *, &ukey->actions));
        ovs_mutex_destroy(&ukey->mutex);
        free(ukey);
    }
}

static void
ukey_delete(struct umap *umap, struct udpif_key *ukey)
    OVS_REQUIRES(umap->mutex)
{
    ovs_mutex_lock(&ukey->mutex);
    if (ukey->state < UKEY_DELETED) {
        cmap_remove(&umap->cmap, &ukey->cmap_node, ukey->hash);
        ovsrcu_postpone(ukey_delete__, ukey);
        transition_ukey(ukey, UKEY_DELETED);
    }
    ovs_mutex_unlock(&ukey->mutex);
}

static bool
should_revalidate(const struct udpif *udpif, const struct udpif_key *ukey,
                  uint64_t packets)
    OVS_REQUIRES(ukey->mutex)
{
    long long int metric, now, duration;
    long long int used = ukey->stats.used;

    if (!ofproto_min_revalidate_pps) {
        return true;
    }

    if (!used) {
        /* Always revalidate the first time a flow is dumped. */
        return true;
    }

    if (udpif->dump_duration < ofproto_max_revalidator / 2) {
        /* We are likely to handle full revalidation for the flows. */
        return true;
    }

    /* Calculate the mean time between seeing these packets. If this
     * exceeds the threshold, then delete the flow rather than performing
     * costly revalidation for flows that aren't being hit frequently.
     *
     * This is targeted at situations where the dump_duration is high (~1s),
     * and revalidation is triggered by a call to udpif_revalidate(). In
     * these situations, revalidation of all flows causes fluctuations in the
     * flow_limit due to the interaction with the dump_duration and max_idle.
     * This tends to result in deletion of low-throughput flows anyway, so
     * skip the revalidation and just delete those flows. */
    packets = MAX(packets, 1);
    now = MAX(used, time_msec());
    duration = now - used;
    metric = duration / packets;

    if (metric < 1000 / ofproto_min_revalidate_pps ||
        (ukey->offloaded && duration < ofproto_offloaded_stats_delay)) {
        /* The flow is receiving more than min-revalidate-pps, so keep it.
         * Or it's a hardware offloaded flow that might take up to X seconds
         * to update its statistics. Until we are sure the statistics had a
         * chance to be updated, also keep it. */
        return true;
    }
    return false;
}

struct reval_context {
    /* Optional output parameters */
    struct flow_wildcards *wc;
    struct ofpbuf *odp_actions;
    struct netflow **netflow;
    struct xlate_cache *xcache;

    /* Required output parameters */
    struct xlate_out xout;
    struct flow flow;
};

/* Translates 'key' into a flow, populating 'ctx' as it goes along.
 *
 * Returns 0 on success, otherwise a positive errno value.
 *
 * The caller is responsible for uninitializing ctx->xout on success.
 */
static int
xlate_key(struct udpif *udpif, const struct nlattr *key, unsigned int len,
          const struct dpif_flow_stats *push, struct reval_context *ctx)
{
    struct ofproto_dpif *ofproto;
    ofp_port_t ofp_in_port;
    enum odp_key_fitness fitness;
    struct xlate_in xin;
    int error;

    fitness = odp_flow_key_to_flow(key, len, &ctx->flow, NULL);
    if (fitness == ODP_FIT_ERROR) {
        return EINVAL;
    }

    error = xlate_lookup(udpif->backer, &ctx->flow, &ofproto, NULL, NULL,
                         ctx->netflow, &ofp_in_port, NULL);
    if (error) {
        return error;
    }

    xlate_in_init(&xin, ofproto, ofproto_dpif_get_tables_version(ofproto),
                  &ctx->flow, ofp_in_port, NULL, push->tcp_flags,
                  NULL, ctx->wc, ctx->odp_actions);
    if (push->n_packets) {
        xin.resubmit_stats = push;
        xin.allow_side_effects = true;
    }
    xin.xcache = ctx->xcache;
    xlate_actions(&xin, &ctx->xout);
    if (fitness == ODP_FIT_TOO_LITTLE) {
        ctx->xout.slow |= SLOW_MATCH;
    }

    return 0;
}

static int
xlate_ukey(struct udpif *udpif, const struct udpif_key *ukey,
           uint16_t tcp_flags, struct reval_context *ctx)
{
    struct dpif_flow_stats push = {
        .tcp_flags = tcp_flags,
    };
    return xlate_key(udpif, ukey->key, ukey->key_len, &push, ctx);
}

static int
populate_xcache(struct udpif *udpif, struct udpif_key *ukey,
                uint16_t tcp_flags)
    OVS_REQUIRES(ukey->mutex)
{
    struct reval_context ctx = {
        .odp_actions = NULL,
        .netflow = NULL,
        .wc = NULL,
    };
    int error;

    ovs_assert(!ukey->xcache);
    ukey->xcache = ctx.xcache = xlate_cache_new();
    error = xlate_ukey(udpif, ukey, tcp_flags, &ctx);
    if (error) {
        return error;
    }
    xlate_out_uninit(&ctx.xout);

    return 0;
}

static enum reval_result
revalidate_ukey__(struct udpif *udpif, const struct udpif_key *ukey,
                  uint16_t tcp_flags, struct ofpbuf *odp_actions,
                  struct recirc_refs *recircs, struct xlate_cache *xcache,
                  enum flow_del_reason *del_reason)
{
    struct xlate_out *xoutp;
    struct netflow *netflow;
    struct flow_wildcards dp_mask, wc;
    enum reval_result result;
    struct reval_context ctx = {
        .odp_actions = odp_actions,
        .netflow = &netflow,
        .xcache = xcache,
        .wc = &wc,
    };

    OVS_USDT_PROBE(revalidate_ukey__, entry, udpif, ukey, tcp_flags,
                   odp_actions, recircs, xcache);

    result = UKEY_DELETE;
    xoutp = NULL;
    netflow = NULL;

    if (xlate_ukey(udpif, ukey, tcp_flags, &ctx)) {
        *del_reason = FDR_XLATION_ERROR;
        goto exit;
    }
    xoutp = &ctx.xout;

    if (xoutp->avoid_caching) {
        *del_reason = FDR_AVOID_CACHING;
        goto exit;
    }

    if (xoutp->slow) {
        struct ofproto_dpif *ofproto;
        ofp_port_t ofp_in_port;

        ofproto = xlate_lookup_ofproto(udpif->backer, &ctx.flow, &ofp_in_port,
                                       NULL);

        ofpbuf_clear(odp_actions);

        if (!ofproto) {
            *del_reason = FDR_NO_OFPROTO;
            goto exit;
        }

        compose_slow_path(udpif, xoutp, ctx.flow.in_port.odp_port,
                          ofp_in_port, odp_actions,
                          ofproto->up.slowpath_meter_id, &ofproto->uuid);
    }

    if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, &dp_mask, &ctx.flow,
                             NULL)
        == ODP_FIT_ERROR) {
        *del_reason = FDR_BAD_ODP_FIT;
        goto exit;
    }

    /* Do not modify if any bit is wildcarded by the installed datapath flow,
     * but not the newly revalidated wildcard mask (wc), i.e., if revalidation
     * tells that the datapath flow is now too generic and must be narrowed
     * down.  Note that we do not know if the datapath has ignored any of the
     * wildcarded bits, so we may be overly conservative here. */
    if (flow_wildcards_has_extra(&dp_mask, ctx.wc)) {
        *del_reason = FDR_FLOW_WILDCARDED;
        goto exit;
    }

    if (!ofpbuf_equal(odp_actions,
                      ovsrcu_get(struct ofpbuf *, &ukey->actions))) {
        /* The datapath mask was OK, but the actions seem to have changed.
         * Let's modify it in place. */
        result = UKEY_MODIFY;
        /* Transfer recirc action ID references to the caller. */
        recirc_refs_swap(recircs, &xoutp->recircs);
        goto exit;
    }

    result = UKEY_KEEP;

exit:
    if (netflow && result == UKEY_DELETE) {
        netflow_flow_clear(netflow, &ctx.flow);
    }
    xlate_out_uninit(xoutp);

    OVS_USDT_PROBE(revalidate_ukey__, exit, udpif, ukey, result);

    return result;
}

static void
log_unexpected_stats_jump(struct udpif_key *ukey,
                          const struct dpif_flow_stats *stats)
    OVS_REQUIRES(ukey->mutex)
{
    static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5);
    struct ds ds = DS_EMPTY_INITIALIZER;
    struct ofpbuf *actions;

    odp_format_ufid(&ukey->ufid, &ds);
    ds_put_cstr(&ds, ", ");
    odp_flow_key_format(ukey->key, ukey->key_len, &ds);
    ds_put_cstr(&ds, ", actions:");
    actions = ovsrcu_get(struct ofpbuf *, &ukey->actions);
    format_odp_actions(&ds, actions->data, actions->size, NULL);
    VLOG_WARN_RL(&rll, "Unexpected jump in packet stats from %"PRIu64
                 " to %"PRIu64" when handling ukey %s",
                 ukey->stats.n_packets, stats->n_packets, ds_cstr(&ds));
    ds_destroy(&ds);
}

/* Verifies that the datapath actions of 'ukey' are still correct, and pushes
 * 'stats' for it.
 *
 * Returns a recommended action for 'ukey', options include:
 *      UKEY_DELETE The ukey should be deleted.
 *      UKEY_KEEP   The ukey is fine as is.
 *      UKEY_MODIFY The ukey's actions should be changed but is otherwise
 *                  fine.  Callers should change the actions to those found
 *                  in the caller supplied 'odp_actions' buffer.  The
 *                  recirculation references can be found in 'recircs' and
 *                  must be handled by the caller.
 *
 * If the result is UKEY_MODIFY, then references to all recirc_ids used by the
 * new flow will be held within 'recircs' (which may be none).
 *
 * The caller is responsible for both initializing 'recircs' prior this call,
 * and ensuring any references are eventually freed.
 */
static enum reval_result
revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
                const struct dpif_flow_stats *stats,
                struct ofpbuf *odp_actions, uint64_t reval_seq,
                struct recirc_refs *recircs, enum flow_del_reason *del_reason)
    OVS_REQUIRES(ukey->mutex)
{
    bool need_revalidate = ukey->reval_seq != reval_seq;
    enum reval_result result = UKEY_DELETE;
    struct dpif_flow_stats push;

    ofpbuf_clear(odp_actions);

    push.used = stats->used;
    push.tcp_flags = stats->tcp_flags;
    push.n_packets = stats->n_packets - ukey->stats.n_packets;
    push.n_bytes = stats->n_bytes - ukey->stats.n_bytes;

    if (stats->n_packets < ukey->stats.n_packets &&
        ukey->stats.n_packets < UINT64_THREE_QUARTERS) {
        /* Report cases where the packet counter is lower than the previous
         * instance, but exclude the potential wrapping of an uint64_t. */
        COVERAGE_INC(ukey_invalid_stat_reset);
        log_unexpected_stats_jump(ukey, stats);
    }

    if (need_revalidate) {
        if (should_revalidate(udpif, ukey, push.n_packets)) {
            if (!ukey->xcache) {
                ukey->xcache = xlate_cache_new();
            } else {
                xlate_cache_clear(ukey->xcache);
            }
            result = revalidate_ukey__(udpif, ukey, push.tcp_flags,
                                       odp_actions, recircs, ukey->xcache,
                                       del_reason);
        } else {
            /* Delete, since it is too expensive to revalidate. */
            *del_reason = FDR_TOO_EXPENSIVE;
        }
    } else if (!push.n_packets || ukey->xcache
               || !populate_xcache(udpif, ukey, push.tcp_flags)) {
        result = UKEY_KEEP;
    }

    /* Stats for deleted flows will be attributed upon flow deletion. Skip. */
    if (result != UKEY_DELETE) {
        xlate_push_stats(ukey->xcache, &push, ukey->offloaded);
        ukey->stats = *stats;
        ukey->reval_seq = reval_seq;
    }

    return result;
}

static void
delete_op_init__(struct udpif *udpif, struct ukey_op *op,
                 const struct dpif_flow *flow)
{
    op->ukey = NULL;
    op->dop.type = DPIF_OP_FLOW_DEL;
    op->dop.flow_del.key = flow->key;
    op->dop.flow_del.key_len = flow->key_len;
    op->dop.flow_del.ufid = flow->ufid_present ? &flow->ufid : NULL;
    op->dop.flow_del.pmd_id = flow->pmd_id;
    op->dop.flow_del.stats = &op->stats;
    op->dop.flow_del.terse = udpif_use_ufid(udpif);
}

static void
delete_op_init(struct udpif *udpif, struct ukey_op *op, struct udpif_key *ukey)
{
    op->ukey = ukey;
    op->dop.type = DPIF_OP_FLOW_DEL;
    op->dop.flow_del.key = ukey->key;
    op->dop.flow_del.key_len = ukey->key_len;
    op->dop.flow_del.ufid = ukey->ufid_present ? &ukey->ufid : NULL;
    op->dop.flow_del.pmd_id = ukey->pmd_id;
    op->dop.flow_del.stats = &op->stats;
    op->dop.flow_del.terse = udpif_use_ufid(udpif);
}

static void
put_op_init(struct ukey_op *op, struct udpif_key *ukey,
            enum dpif_flow_put_flags flags)
{
    op->ukey = ukey;
    op->dop.type = DPIF_OP_FLOW_PUT;
    op->dop.flow_put.flags = flags;
    op->dop.flow_put.key = ukey->key;
    op->dop.flow_put.key_len = ukey->key_len;
    op->dop.flow_put.mask = ukey->mask;
    op->dop.flow_put.mask_len = ukey->mask_len;
    op->dop.flow_put.ufid = ukey->ufid_present ? &ukey->ufid : NULL;
    op->dop.flow_put.pmd_id = ukey->pmd_id;
    op->dop.flow_put.stats = NULL;
    ukey_get_actions(ukey, &op->dop.flow_put.actions,
                     &op->dop.flow_put.actions_len);
}

/* Executes datapath operations 'ops' and attributes stats retrieved from the
 * datapath as part of those operations. */
static void
push_dp_ops(struct udpif *udpif, struct ukey_op *ops, size_t n_ops)
{
    struct dpif_op *opsp[REVALIDATE_MAX_BATCH];
    size_t i;

    ovs_assert(n_ops <= REVALIDATE_MAX_BATCH);
    for (i = 0; i < n_ops; i++) {
        opsp[i] = &ops[i].dop;
    }
    dpif_operate(udpif->dpif, opsp, n_ops, DPIF_OFFLOAD_AUTO);

    for (i = 0; i < n_ops; i++) {
        struct ukey_op *op = &ops[i];

        if (op->dop.error) {
            if (op->ukey) {
                ovs_mutex_lock(&op->ukey->mutex);
                if (op->dop.type == DPIF_OP_FLOW_DEL) {
                    transition_ukey(op->ukey, UKEY_EVICTED);
                } else {
                    /* Modification of the flow failed. */
                    transition_ukey(op->ukey, UKEY_INCONSISTENT);
                }
                ovs_mutex_unlock(&op->ukey->mutex);
            }
            continue;
        }

        if (op->dop.type != DPIF_OP_FLOW_DEL) {
            /* Only deleted flows need their stats pushed. */
            continue;
        }

        struct dpif_flow_stats *push, *stats, push_buf;

        stats = op->dop.flow_del.stats;
        push = &push_buf;

        if (op->ukey) {
            ovs_mutex_lock(&op->ukey->mutex);
            transition_ukey(op->ukey, UKEY_EVICTED);
            push->used = MAX(stats->used, op->ukey->stats.used);
            push->tcp_flags = stats->tcp_flags | op->ukey->stats.tcp_flags;
            push->n_packets = stats->n_packets - op->ukey->stats.n_packets;
            push->n_bytes = stats->n_bytes - op->ukey->stats.n_bytes;

            if (stats->n_packets < op->ukey->stats.n_packets &&
                op->ukey->stats.n_packets < UINT64_THREE_QUARTERS) {
                /* Report cases where the packet counter is lower than the
                 * previous instance, but exclude the potential wrapping of an
                 * uint64_t. */
                COVERAGE_INC(ukey_invalid_stat_reset);
            }

            ovs_mutex_unlock(&op->ukey->mutex);
        } else {
            push = stats;
        }

        if (push->n_packets || netflow_exists()) {
            const struct nlattr *key = op->dop.flow_del.key;
            size_t key_len = op->dop.flow_del.key_len;
            struct netflow *netflow;
            struct reval_context ctx = {
                .netflow = &netflow,
            };
            int error;

            if (op->ukey) {
                ovs_mutex_lock(&op->ukey->mutex);
                if (op->ukey->xcache) {
                    xlate_push_stats(op->ukey->xcache, push, false);
                    ovs_mutex_unlock(&op->ukey->mutex);
                    continue;
                }
                ovs_mutex_unlock(&op->ukey->mutex);
                key = op->ukey->key;
                key_len = op->ukey->key_len;
            }

            error = xlate_key(udpif, key, key_len, push, &ctx);
            if (error) {
                static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5);
                VLOG_WARN_RL(&rll, "xlate_key failed (%s)!",
                             ovs_strerror(error));
            } else {
                xlate_out_uninit(&ctx.xout);
                if (netflow) {
                    netflow_flow_clear(netflow, &ctx.flow);
                }
            }
        }
    }
}

/* Executes datapath operations 'ops', attributes stats retrieved from the
 * datapath, and deletes ukeys corresponding to deleted flows. */
static void
push_ukey_ops(struct udpif *udpif, struct umap *umap,
              struct ukey_op *ops, size_t n_ops)
{
    int i;

    push_dp_ops(udpif, ops, n_ops);
    ovs_mutex_lock(&umap->mutex);
    for (i = 0; i < n_ops; i++) {
        if (ops[i].dop.type == DPIF_OP_FLOW_DEL) {
            ukey_delete(umap, ops[i].ukey);
        }
    }
    ovs_mutex_unlock(&umap->mutex);
}

static void
log_unexpected_flow(const struct dpif_flow *flow, int error)
{
    struct ds ds = DS_EMPTY_INITIALIZER;

    ds_put_format(&ds, "Failed to acquire udpif_key corresponding to "
                  "unexpected flow (%s): ", ovs_strerror(error));
    odp_format_ufid(&flow->ufid, &ds);

    static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(10, 60);
    VLOG_WARN_RL(&rll, "%s", ds_cstr(&ds));

    ds_destroy(&ds);
}

static void
reval_op_init(struct ukey_op *op, enum reval_result result,
              struct udpif *udpif, struct udpif_key *ukey,
              struct recirc_refs *recircs, struct ofpbuf *odp_actions)
    OVS_REQUIRES(ukey->mutex)
{
    if (result == UKEY_DELETE) {
        delete_op_init(udpif, op, ukey);
        transition_ukey(ukey, UKEY_EVICTING);
    } else if (result == UKEY_MODIFY) {
        /* Store the new recircs. */
        recirc_refs_swap(&ukey->recircs, recircs);
        /* Release old recircs. */
        recirc_refs_unref(recircs);
        /* ukey->key_recirc_id remains, as the key is the same as before. */

        ukey_set_actions(ukey, odp_actions);
        put_op_init(op, ukey, DPIF_FP_MODIFY);
    }
}

static void
ukey_netdev_unref(struct udpif_key *ukey)
{
    if (!ukey->in_netdev) {
        return;
    }
    netdev_close(ukey->in_netdev);
    ukey->in_netdev = NULL;
}

/*
 * Given a udpif_key, get its input port (netdev) by parsing the flow keys
 * and actions. The flow may not contain flow attributes if it is a terse
 * dump; read its attributes from the ukey and then parse the flow to get
 * the port info. Save them in udpif_key.
 */
static void
ukey_to_flow_netdev(struct udpif *udpif, struct udpif_key *ukey)
{
    const char *dpif_type_str = dpif_normalize_type(dpif_type(udpif->dpif));
    const struct nlattr *k;
    unsigned int left;

    /* Remove existing references to netdev */
    ukey_netdev_unref(ukey);

    /* Find the input port and get a reference to its netdev */
    NL_ATTR_FOR_EACH (k, left, ukey->key, ukey->key_len) {
        enum ovs_key_attr type = nl_attr_type(k);

        if (type == OVS_KEY_ATTR_IN_PORT) {
            ukey->in_netdev = netdev_ports_get(nl_attr_get_odp_port(k),
                                               dpif_type_str);
        } else if (type == OVS_KEY_ATTR_TUNNEL) {
            struct flow_tnl tnl;
            enum odp_key_fitness res;

            if (ukey->in_netdev) {
                netdev_close(ukey->in_netdev);
                ukey->in_netdev = NULL;
            }
            res = odp_tun_key_from_attr(k, &tnl, NULL);
            if (res != ODP_FIT_ERROR) {
                ukey->in_netdev = flow_get_tunnel_netdev(&tnl);
                break;
            }
        }
    }
}

static uint64_t
udpif_flow_packet_delta(struct udpif_key *ukey, const struct dpif_flow *f)
{
    return f->stats.n_packets + ukey->flow_backlog_packets -
                ukey->flow_packets;
}

static long long int
udpif_flow_time_delta(struct udpif *udpif, struct udpif_key *ukey)
{
    return (udpif->dpif->current_ms - ukey->flow_time) / 1000;
}

/*
 * Save backlog packet count while switching modes
 * between offloaded and kernel datapaths.
 */
static void
udpif_set_ukey_backlog_packets(struct udpif_key *ukey)
{
    ukey->flow_backlog_packets = ukey->flow_packets;
}

/* Gather pps-rate for the given dpif_flow and save it in its ukey */
static void
udpif_update_flow_pps(struct udpif *udpif, struct udpif_key *ukey,
                      const struct dpif_flow *f)
{
    uint64_t pps;

    /* Update pps-rate only when we are close to rebalance interval */
    if (udpif->dpif->current_ms - ukey->flow_time < OFFL_REBAL_INTVL_MSEC) {
        return;
    }

    ukey->offloaded = f->attrs.offloaded;
    pps = udpif_flow_packet_delta(ukey, f) /
                    udpif_flow_time_delta(udpif, ukey);
    ukey->flow_pps_rate = pps;
    ukey->flow_packets = ukey->flow_backlog_packets + f->stats.n_packets;
    ukey->flow_time = udpif->dpif->current_ms;
}

static long long int
udpif_update_used(struct udpif *udpif, struct udpif_key *ukey,
                  struct dpif_flow_stats *stats)
    OVS_REQUIRES(ukey->mutex)
{
    if (!udpif->dump->terse) {
        return ukey->created;
    }

    if (stats->n_packets > ukey->stats.n_packets) {
        stats->used = udpif->dpif->current_ms;
    } else if (ukey->stats.used) {
        stats->used = ukey->stats.used;
    } else {
        stats->used = ukey->created;
    }
    return stats->used;
}

static void
revalidate(struct revalidator *revalidator)
{
    uint64_t odp_actions_stub[1024 / 8];
    struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);

    struct udpif *udpif = revalidator->udpif;
    struct dpif_flow_dump_thread *dump_thread;
    uint64_t dump_seq, reval_seq;
    bool kill_warn_print = true;
    unsigned int flow_limit;

    dump_seq = seq_read(udpif->dump_seq);
    reval_seq = seq_read(udpif->reval_seq);
    atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
    dump_thread = dpif_flow_dump_thread_create(udpif->dump);
    for (;;) {
        struct ukey_op ops[REVALIDATE_MAX_BATCH];
        int n_ops = 0;

        struct dpif_flow flows[REVALIDATE_MAX_BATCH];
        const struct dpif_flow *f;
        int n_dumped;

        long long int max_idle;
        long long int now;
        size_t kill_all_limit;
        size_t n_dp_flows;
        bool kill_them_all;

        n_dumped = dpif_flow_dump_next(dump_thread, flows, ARRAY_SIZE(flows));
        if (!n_dumped) {
            break;
        }

        /* In normal operation we want to keep flows around until they have
         * been idle for 'ofproto_max_idle' milliseconds.  However:
         *
         *     - If the number of datapath flows climbs above 'flow_limit',
         *       drop that down to 100 ms to try to bring the flows down to
         *       the limit.
         *
         *     - If the number of datapath flows climbs above twice
         *       'flow_limit', delete all the datapath flows as an emergency
         *       measure.  (We reassess this condition for the next batch of
         *       datapath flows, so we will recover before all the flows are
         *       gone.) */
        n_dp_flows = udpif_get_n_flows(udpif);
        if (n_dp_flows >= flow_limit) {
            COVERAGE_INC(upcall_flow_limit_hit);
        }

        kill_them_all = false;
        kill_all_limit = flow_limit * 2;
        if (OVS_UNLIKELY(n_dp_flows > kill_all_limit)) {
            static struct vlog_rate_limit rlem = VLOG_RATE_LIMIT_INIT(1, 1);

            kill_them_all = true;
            COVERAGE_INC(upcall_flow_limit_kill);
            if (kill_warn_print) {
                kill_warn_print = false;
                VLOG_WARN_RL(&rlem,
                    "Number of datapath flows (%"PRIuSIZE") twice as high as "
                    "current dynamic flow limit (%"PRIuSIZE").  "
                    "Starting to delete flows unconditionally "
                    "as an emergency measure.", n_dp_flows, kill_all_limit);
            }
        }

        max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;

        udpif->dpif->current_ms = now = time_msec();
        for (f = flows; f < &flows[n_dumped]; f++) {
            long long int used = f->stats.used;
            struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
            enum flow_del_reason del_reason = FDR_NONE;
            struct dpif_flow_stats stats = f->stats;
            enum reval_result result;
            struct udpif_key *ukey;
            bool already_dumped;
            int error;

            if (ukey_acquire(udpif, f, &ukey, &error)) {
                if (error == EBUSY) {
                    /* Another thread is processing this flow, so don't bother
                     * processing it.*/
                    COVERAGE_INC(upcall_ukey_contention);
                } else {
                    log_unexpected_flow(f, error);
                    if (error != ENOENT) {
                        delete_op_init__(udpif, &ops[n_ops++], f);
                    }
                }
                continue;
            }

            ukey->offloaded = f->attrs.offloaded;
            if (!ukey->dp_layer
                || (!dpif_synced_dp_layers(udpif->dpif)
                    && strcmp(ukey->dp_layer, f->attrs.dp_layer))) {

                if (ukey->dp_layer) {
                    /* The dp_layer has changed this is probably due to an
                     * earlier revalidate cycle moving it to/from hw offload.
                     * In this case we should reset the ukey stored statistics,
                     * as they are from the deleted DP flow. */
                    COVERAGE_INC(ukey_dp_change);
                    memset(&ukey->stats, 0, sizeof ukey->stats);
                }
                ukey->dp_layer = f->attrs.dp_layer;
            }

            already_dumped = ukey->dump_seq == dump_seq;
            if (already_dumped) {
                /* The flow has already been handled during this flow dump
                 * operation. Skip it. */
                if (ukey->xcache) {
                    COVERAGE_INC(dumped_duplicate_flow);
                } else {
                    COVERAGE_INC(dumped_new_flow);
                }
                ovs_mutex_unlock(&ukey->mutex);
                continue;
            }

            if (ukey->state == UKEY_INCONSISTENT) {
                ukey->dump_seq = dump_seq;
                reval_op_init(&ops[n_ops++], UKEY_DELETE, udpif, ukey,
                              &recircs, &odp_actions);
                ovs_mutex_unlock(&ukey->mutex);
                COVERAGE_INC(dumped_inconsistent_flow);
                continue;
            }

            if (ukey->state <= UKEY_OPERATIONAL) {
                /* The flow is now confirmed to be in the datapath. */
                transition_ukey(ukey, UKEY_OPERATIONAL);
            } else {
                VLOG_INFO("Unexpected ukey transition from state %d "
                          "(last transitioned from thread %u at %s)",
                          ukey->state, ukey->state_thread, ukey->state_where);
                ovs_mutex_unlock(&ukey->mutex);
                continue;
            }

            if (!used) {
                used = udpif_update_used(udpif, ukey, &stats);
            }
            if (kill_them_all || (used && used < now - max_idle)) {
                result = UKEY_DELETE;
                del_reason = (kill_them_all) ? FDR_FLOW_LIMIT : FDR_FLOW_IDLE;
            } else {
                result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
                                         reval_seq, &recircs, &del_reason);
            }
            ukey->dump_seq = dump_seq;

            if (netdev_is_offload_rebalance_policy_enabled() &&
                result != UKEY_DELETE) {
                udpif_update_flow_pps(udpif, ukey, f);
            }

            OVS_USDT_PROBE(revalidate, flow_result, udpif, ukey, result,
                           del_reason);
            if (result != UKEY_KEEP) {
                /* Takes ownership of 'recircs'. */
                reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
                              &odp_actions);
            }
            ovs_mutex_unlock(&ukey->mutex);
        }

        if (n_ops) {
            /* Push datapath ops but defer ukey deletion to 'sweep' phase. */
            push_dp_ops(udpif, ops, n_ops);
        }
        ovsrcu_quiesce();
    }
    dpif_flow_dump_thread_destroy(dump_thread);
    ofpbuf_uninit(&odp_actions);
}

/* Pauses the 'revalidator', can only proceed after main thread
 * calls udpif_resume_revalidators(). */
static void
revalidator_pause(struct revalidator *revalidator)
{
    /* The first block is for sync'ing the pause with main thread. */
    ovs_barrier_block(&revalidator->udpif->pause_barrier);
    /* The second block is for pausing until main thread resumes. */
    ovs_barrier_block(&revalidator->udpif->pause_barrier);
}

static void
revalidator_sweep__(struct revalidator *revalidator, bool purge)
{
    struct udpif *udpif;
    uint64_t dump_seq, reval_seq;
    int slice;

    udpif = revalidator->udpif;
    dump_seq = seq_read(udpif->dump_seq);
    reval_seq = seq_read(udpif->reval_seq);
    slice = revalidator - udpif->revalidators;
    ovs_assert(slice < udpif->n_revalidators);

    for (int i = slice; i < N_UMAPS; i += udpif->n_revalidators) {
        uint64_t odp_actions_stub[1024 / 8];
        struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);

        struct ukey_op ops[REVALIDATE_MAX_BATCH];
        struct udpif_key *ukey;
        struct umap *umap = &udpif->ukeys[i];
        size_t n_ops = 0;

        CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
            enum flow_del_reason del_reason = FDR_NONE;
            enum ukey_state ukey_state;

            /* Handler threads could be holding a ukey lock while it installs a
             * new flow, so don't hang around waiting for access to it. */
            if (ovs_mutex_trylock(&ukey->mutex)) {
                COVERAGE_INC(upcall_ukey_contention);
                continue;
            }
            ukey_state = ukey->state;
            if (ukey_state == UKEY_OPERATIONAL
                || (ukey_state == UKEY_INCONSISTENT)
                || (ukey_state == UKEY_VISIBLE && purge)) {
                struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
                bool seq_mismatch = (ukey->dump_seq != dump_seq
                                     && ukey->reval_seq != reval_seq);
                enum reval_result result;

                if (purge || ukey_state == UKEY_INCONSISTENT) {
                    result = UKEY_DELETE;
                    del_reason = purge ? FDR_PURGE : FDR_UPDATE_FAIL;
                } else if (!seq_mismatch) {
                    result = UKEY_KEEP;
                } else {
                    struct dpif_flow_stats stats;
                    COVERAGE_INC(revalidate_missed_dp_flow);
                    memcpy(&stats, &ukey->stats, sizeof stats);
                    result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
                                             reval_seq, &recircs, &del_reason);
                }

                if (ukey->dump_seq != dump_seq) {
                    ukey->missed_dumps++;
                    if (ukey->missed_dumps >= 4) {
                        /* If the flow was not dumped for 4 revalidator rounds,
                         * we can assume the datapath flow no longer exists
                         * and the ukey should be deleted. */
                        COVERAGE_INC(revalidate_missing_dp_flow);
                        del_reason = FDR_FLOW_MISSING_DP;
                        result = UKEY_DELETE;
                    }
                } else {
                    ukey->missed_dumps = 0;
                }

                if (result != UKEY_KEEP) {
                    /* Clears 'recircs' if filled by revalidate_ukey(). */
                    reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
                                  &odp_actions);
                }
                OVS_USDT_PROBE(revalidator_sweep__, flow_sweep_result, udpif,
                               ukey, result, del_reason);
            }
            ovs_mutex_unlock(&ukey->mutex);

            if (ukey_state == UKEY_EVICTED) {
                /* The common flow deletion case involves deletion of the flow
                 * during the dump phase and ukey deletion here. */
                ovs_mutex_lock(&umap->mutex);
                ukey_delete(umap, ukey);
                ovs_mutex_unlock(&umap->mutex);
            }

            if (n_ops == REVALIDATE_MAX_BATCH) {
                /* Update/delete missed flows and clean up corresponding ukeys
                 * if necessary. */
                push_ukey_ops(udpif, umap, ops, n_ops);
                n_ops = 0;
            }
        }

        if (n_ops) {
            push_ukey_ops(udpif, umap, ops, n_ops);
        }

        ofpbuf_uninit(&odp_actions);
        ovsrcu_quiesce();
    }
}

static void
revalidator_sweep(struct revalidator *revalidator)
{
    revalidator_sweep__(revalidator, false);
}

static void
revalidator_purge(struct revalidator *revalidator)
{
    revalidator_sweep__(revalidator, true);
}

/* In reaction to dpif purge, purges all 'ukey's with same 'pmd_id'. */
static void
dp_purge_cb(void *aux, unsigned pmd_id)
    OVS_NO_THREAD_SAFETY_ANALYSIS
{
    struct udpif *udpif = aux;
    size_t i;

    udpif_pause_revalidators(udpif);
    for (i = 0; i < N_UMAPS; i++) {
        struct ukey_op ops[REVALIDATE_MAX_BATCH];
        struct udpif_key *ukey;
        struct umap *umap = &udpif->ukeys[i];
        size_t n_ops = 0;

        CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
            if (ukey->pmd_id == pmd_id) {
                delete_op_init(udpif, &ops[n_ops++], ukey);
                transition_ukey(ukey, UKEY_EVICTING);

                if (n_ops == REVALIDATE_MAX_BATCH) {
                    push_ukey_ops(udpif, umap, ops, n_ops);
                    n_ops = 0;
                }
            }
        }

        if (n_ops) {
            push_ukey_ops(udpif, umap, ops, n_ops);
        }

        ovsrcu_quiesce();
    }
    udpif_resume_revalidators(udpif);
}

static void
upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                    const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
    struct ds ds = DS_EMPTY_INITIALIZER;
    uint64_t n_offloaded_flows;
    struct udpif *udpif;

    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
        unsigned int flow_limit;
        bool ufid_enabled;
        size_t i;

        atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
        ufid_enabled = udpif_use_ufid(udpif);

        ds_put_format(&ds, "%s:\n", dpif_name(udpif->dpif));
        ds_put_format(&ds, "  flows         : (current %lu)"
            " (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif),
            udpif->avg_n_flows, udpif->max_n_flows, flow_limit);
        if (!dpif_get_n_offloaded_flows(udpif->dpif, &n_offloaded_flows)) {
            ds_put_format(&ds, "  offloaded flows : %"PRIu64"\n",
                          n_offloaded_flows);
        }
        ds_put_format(&ds, "  dump duration : %lldms\n", udpif->dump_duration);
        ds_put_format(&ds, "  ufid enabled : ");
        if (ufid_enabled) {
            ds_put_format(&ds, "true\n");
        } else {
            ds_put_format(&ds, "false\n");
        }
        ds_put_char(&ds, '\n');

        for (i = 0; i < udpif->n_revalidators; i++) {
            struct revalidator *revalidator = &udpif->revalidators[i];
            int j, elements = 0;

            for (j = i; j < N_UMAPS; j += udpif->n_revalidators) {
                elements += cmap_count(&udpif->ukeys[j].cmap);
            }
            ds_put_format(&ds, "  %u: (keys %d)\n", revalidator->id, elements);
        }
    }

    unixctl_command_reply(conn, ds_cstr(&ds));
    ds_destroy(&ds);
}

/* Disable using the megaflows.
 *
 * This command is only needed for advanced debugging, so it's not
 * documented in the man page. */
static void
upcall_unixctl_disable_megaflows(struct unixctl_conn *conn,
                                 int argc OVS_UNUSED,
                                 const char *argv[] OVS_UNUSED,
                                 void *aux OVS_UNUSED)
{
    atomic_store_relaxed(&enable_megaflows, false);
    udpif_flush_all_datapaths();
    unixctl_command_reply(conn, "megaflows disabled");
}

/* Re-enable using megaflows.
 *
 * This command is only needed for advanced debugging, so it's not
 * documented in the man page. */
static void
upcall_unixctl_enable_megaflows(struct unixctl_conn *conn,
                                int argc OVS_UNUSED,
                                const char *argv[] OVS_UNUSED,
                                void *aux OVS_UNUSED)
{
    atomic_store_relaxed(&enable_megaflows, true);
    udpif_flush_all_datapaths();
    unixctl_command_reply(conn, "megaflows enabled");
}

/* Disable skipping flow attributes during flow dump.
 *
 * This command is only needed for advanced debugging, so it's not
 * documented in the man page. */
static void
upcall_unixctl_disable_ufid(struct unixctl_conn *conn, int argc OVS_UNUSED,
                           const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
    atomic_store_relaxed(&enable_ufid, false);
    unixctl_command_reply(conn, "Datapath dumping tersely using UFID disabled");
}

/* Re-enable skipping flow attributes during flow dump.
 *
 * This command is only needed for advanced debugging, so it's not documented
 * in the man page. */
static void
upcall_unixctl_enable_ufid(struct unixctl_conn *conn, int argc OVS_UNUSED,
                          const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
    atomic_store_relaxed(&enable_ufid, true);
    unixctl_command_reply(conn, "Datapath dumping tersely using UFID enabled "
                                "for supported datapaths");
}

/* Set the flow limit.
 *
 * This command is only needed for advanced debugging, so it's not
 * documented in the man page. */
static void
upcall_unixctl_set_flow_limit(struct unixctl_conn *conn,
                              int argc OVS_UNUSED,
                              const char *argv[],
                              void *aux OVS_UNUSED)
{
    struct ds ds = DS_EMPTY_INITIALIZER;
    struct udpif *udpif;
    unsigned int flow_limit = atoi(argv[1]);

    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
        atomic_store_relaxed(&udpif->flow_limit, flow_limit);
    }
    ds_put_format(&ds, "set flow_limit to %u\n", flow_limit);
    unixctl_command_reply(conn, ds_cstr(&ds));
    ds_destroy(&ds);
}

static void
upcall_unixctl_dump_wait(struct unixctl_conn *conn,
                         int argc OVS_UNUSED,
                         const char *argv[] OVS_UNUSED,
                         void *aux OVS_UNUSED)
{
    if (ovs_list_is_singleton(&all_udpifs)) {
        struct udpif *udpif = NULL;
        size_t len;

        udpif = OBJECT_CONTAINING(ovs_list_front(&all_udpifs), udpif, list_node);
        len = (udpif->n_conns + 1) * sizeof *udpif->conns;
        udpif->conn_seq = seq_read(udpif->dump_seq);
        udpif->conns = xrealloc(udpif->conns, len);
        udpif->conns[udpif->n_conns++] = conn;
    } else {
        unixctl_command_reply_error(conn, "can't wait on multiple udpifs.");
    }
}

static void
upcall_unixctl_purge(struct unixctl_conn *conn, int argc OVS_UNUSED,
                     const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
    struct udpif *udpif;

    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
        bool wake_up = false;
        int n;

        if (!latch_is_set(&udpif->pause_latch)) {
            udpif_pause_revalidators(udpif);
            wake_up = true;
        }
        for (n = 0; n < udpif->n_revalidators; n++) {
            revalidator_purge(&udpif->revalidators[n]);
        }
        if (wake_up) {
            udpif_resume_revalidators(udpif);
        }
    }
    unixctl_command_reply(conn, "");
}

static void
upcall_unixctl_pause(struct unixctl_conn *conn, int argc OVS_UNUSED,
                     const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
    struct udpif *udpif;

    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
        udpif_pause_revalidators(udpif);
    }
    unixctl_command_reply(conn, "");
}

static void
upcall_unixctl_resume(struct unixctl_conn *conn, int argc OVS_UNUSED,
                      const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
    struct udpif *udpif;

    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
        udpif_resume_revalidators(udpif);
    }
    unixctl_command_reply(conn, "");
}

static void
upcall_unixctl_ofproto_detrace(struct unixctl_conn *conn, int argc,
                               const char *argv[], void *aux OVS_UNUSED)
{
    const char *key_s = argv[1];
    const char *pmd_str = NULL;
    unsigned int pmd_id;
    ovs_u128 ufid;

    if (odp_ufid_from_string(key_s, &ufid) <= 0) {
        unixctl_command_reply_error(conn, "failed to parse ufid");
        return;
    }

    if (argc == 3) {
        pmd_str = argv[2];
        if (!ovs_scan(pmd_str, "pmd=%d", &pmd_id)) {
            unixctl_command_reply_error(conn,
                                        "Invalid pmd argument format. "
                                        "Expecting 'pmd=PMD-ID'");
            return;
        }
    }

    struct ds ds = DS_EMPTY_INITIALIZER;
    struct udpif *udpif;

    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
        if (!pmd_str) {
            const char *type = dpif_normalize_type(dpif_type(udpif->dpif));

            pmd_id = !strcmp(type, "system") ? PMD_ID_NULL : NON_PMD_CORE_ID;
        }

        struct udpif_key *ukey = ukey_lookup(udpif, &ufid, pmd_id);
        if (!ukey) {
            ds_put_format(&ds, "UFID was not found for %s\n",
                          dpif_name(udpif->dpif));
            continue;
        }

        ovs_mutex_lock(&ukey->mutex);
        /* It only makes sense to format rules for ukeys that are (still)
         * in use. */
        if ((ukey->state == UKEY_VISIBLE || ukey->state == UKEY_OPERATIONAL)
            && ukey->xcache) {
            xlate_xcache_format(&ds, ukey->xcache);
        } else {
            ds_put_format(&ds, "Cache was not found for %s\n",
                          dpif_name(udpif->dpif));
        }
        ovs_mutex_unlock(&ukey->mutex);
    }
    unixctl_command_reply(conn, ds_cstr(&ds));
    ds_destroy(&ds);
}


/* Flows are sorted in the following order:
 * netdev, flow state (offloaded/kernel path), flow_pps_rate.
 */
static int
flow_compare_rebalance(const void *elem1, const void *elem2)
{
    const struct udpif_key *f1 = *(struct udpif_key **)elem1;
    const struct udpif_key *f2 = *(struct udpif_key **)elem2;
    int64_t diff;

    if (f1->in_netdev < f2->in_netdev) {
        return -1;
    } else if (f1->in_netdev > f2->in_netdev) {
        return 1;
    }

    if (f1->offloaded != f2->offloaded) {
        return f2->offloaded - f1->offloaded;
    }

    diff = (f1->offloaded == true) ?
        f1->flow_pps_rate - f2->flow_pps_rate :
        f2->flow_pps_rate - f1->flow_pps_rate;

    return (diff < 0) ? -1 : 1;
}

/* Insert flows from pending array during rebalancing */
static int
rebalance_insert_pending(struct udpif *udpif, struct udpif_key **pending_flows,
                         int pending_count, int insert_count,
                         uint64_t rate_threshold)
{
    int count = 0;

    for (int i = 0; i < pending_count; i++) {
        struct udpif_key *flow = pending_flows[i];
        int err;

        /* Stop offloading pending flows if the insert count is
         * reached and the flow rate is less than the threshold
         */
        if (count >= insert_count && flow->flow_pps_rate < rate_threshold) {
                break;
        }

        /* Offload the flow to netdev */
        err = udpif_flow_program(udpif, flow, DPIF_OFFLOAD_ALWAYS);

        if (err == ENOSPC) {
            /* Stop if we are out of resources */
            break;
        }

        if (err) {
            continue;
        }

        /* Offload succeeded; delete it from the kernel datapath */
        udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_NEVER);

        /* Change the state of the flow, adjust dpif counters */
        flow->offloaded = true;

        udpif_set_ukey_backlog_packets(flow);
        count++;
    }

    return count;
}

/* Remove flows from offloaded array during rebalancing */
static void
rebalance_remove_offloaded(struct udpif *udpif,
                           struct udpif_key **offloaded_flows,
                           int offload_count)
{
    for (int i = 0; i < offload_count; i++) {
        struct udpif_key *flow = offloaded_flows[i];
        int err;

        /* Install the flow into kernel path first */
        err = udpif_flow_program(udpif, flow, DPIF_OFFLOAD_NEVER);
        if (err) {
            continue;
        }

        /* Success; now remove offloaded flow from netdev */
        err = udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_ALWAYS);
        if (err) {
            udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_NEVER);
            continue;
        }
        udpif_set_ukey_backlog_packets(flow);
        flow->offloaded = false;
    }
}

/*
 * Rebalance offloaded flows on a netdev that's in OOR state.
 *
 * The rebalancing is done in two phases. In the first phase, we check if
 * the pending flows can be offloaded (if some resources became available
 * in the meantime) by trying to offload each pending flow. If all pending
 * flows get successfully offloaded, the OOR state is cleared on the netdev
 * and there's nothing to rebalance.
 *
 * If some of the pending flows could not be offloaded, i.e, we still see
 * the OOR error, then we move to the second phase of rebalancing. In this
 * phase, the rebalancer compares pps-rate of an offloaded flow with the
 * least pps-rate with that of a pending flow with the highest pps-rate from
 * their respective sorted arrays. If pps-rate of the offloaded flow is less
 * than the pps-rate of the pending flow, then it deletes the offloaded flow
 * from the HW/netdev and adds it to kernel datapath and then offloads pending
 * to HW/netdev. This process is repeated for every pair of offloaded and
 * pending flows in the ordered list. The process stops when we encounter an
 * offloaded flow that has a higher pps-rate than the corresponding pending
 * flow. The entire rebalancing process is repeated in the next iteration.
 */
static bool
rebalance_device(struct udpif *udpif, struct udpif_key **offloaded_flows,
                 int offload_count, struct udpif_key **pending_flows,
                 int pending_count)
{

    /* Phase 1 */
    int num_inserted = rebalance_insert_pending(udpif, pending_flows,
                                                pending_count, pending_count,
                                                0);
    if (num_inserted) {
        VLOG_DBG("Offload rebalance: Phase1: inserted %d pending flows",
                  num_inserted);
    }

    /* Adjust pending array */
    pending_flows = &pending_flows[num_inserted];
    pending_count -= num_inserted;

    if (!pending_count) {
        /*
         * Successfully offloaded all pending flows. The device
         * is no longer in OOR state; done rebalancing this device.
         */
        return false;
    }

    /*
     * Phase 2; determine how many offloaded flows to churn.
     */
#define	OFFL_REBAL_MAX_CHURN    1024
    int churn_count = 0;
    while (churn_count < OFFL_REBAL_MAX_CHURN && churn_count < offload_count
           && churn_count < pending_count) {
        if (pending_flows[churn_count]->flow_pps_rate <=
            offloaded_flows[churn_count]->flow_pps_rate)
                break;
        churn_count++;
    }

    if (churn_count) {
        VLOG_DBG("Offload rebalance: Phase2: removing %d offloaded flows",
                  churn_count);
    }

    /* Bail early if nothing to churn */
    if (!churn_count) {
        return true;
    }

    /* Remove offloaded flows */
    rebalance_remove_offloaded(udpif, offloaded_flows, churn_count);

    /* Adjust offloaded array */
    offloaded_flows = &offloaded_flows[churn_count];
    offload_count -= churn_count;

    /* Replace offloaded flows with pending flows */
    num_inserted = rebalance_insert_pending(udpif, pending_flows,
                                            pending_count, churn_count,
                                            offload_count ?
                                            offloaded_flows[0]->flow_pps_rate :
                                            0);
    if (num_inserted) {
        VLOG_DBG("Offload rebalance: Phase2: inserted %d pending flows",
                  num_inserted);
    }

    return true;
}

static struct udpif_key **
udpif_add_oor_flows(struct udpif_key **sort_flows, size_t *total_flow_count,
                    size_t *alloc_flow_count, struct udpif_key *ukey)
{
    if (*total_flow_count >= *alloc_flow_count) {
        sort_flows = x2nrealloc(sort_flows, alloc_flow_count, sizeof ukey);
    }
    sort_flows[(*total_flow_count)++] = ukey;
    return sort_flows;
}

/*
 * Build sort_flows[] initially with flows that
 * reference an 'OOR' netdev as their input port.
 */
static struct udpif_key **
udpif_build_oor_flows(struct udpif_key **sort_flows, size_t *total_flow_count,
                      size_t *alloc_flow_count, struct udpif_key *ukey,
                      int *oor_netdev_count)
{
    struct netdev *netdev;
    int count;

    /* Input netdev must be available for the flow */
    netdev = ukey->in_netdev;
    if (!netdev) {
        return sort_flows;
    }

    /* Is the in-netdev for this flow in OOR state ? */
    if (!netdev_get_hw_info(netdev, HW_INFO_TYPE_OOR)) {
        ukey_netdev_unref(ukey);
        return sort_flows;
    }

    /* Add the flow to sort_flows[] */
    sort_flows = udpif_add_oor_flows(sort_flows, total_flow_count,
                                      alloc_flow_count, ukey);
    if (ukey->offloaded) {
        count = netdev_get_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT);
        ovs_assert(count >= 0);
        if (count++ == 0) {
            (*oor_netdev_count)++;
        }
        netdev_set_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT, count);
    } else {
        count = netdev_get_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT);
        ovs_assert(count >= 0);
        netdev_set_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT, ++count);
    }

    return sort_flows;
}

/*
 * Rebalance offloaded flows on HW netdevs that are in OOR state.
 */
static void
udpif_flow_rebalance(struct udpif *udpif)
{
    struct udpif_key **sort_flows = NULL;
    size_t alloc_flow_count = 0;
    size_t total_flow_count = 0;
    int oor_netdev_count = 0;
    int offload_index = 0;
    int pending_index;

    /* Collect flows (offloaded and pending) that reference OOR netdevs */
    for (size_t i = 0; i < N_UMAPS; i++) {
        struct udpif_key *ukey;
        struct umap *umap = &udpif->ukeys[i];

        CMAP_FOR_EACH (ukey, cmap_node, &umap->cmap) {
            ukey_to_flow_netdev(udpif, ukey);
            sort_flows = udpif_build_oor_flows(sort_flows, &total_flow_count,
                                               &alloc_flow_count, ukey,
                                               &oor_netdev_count);
        }
    }

    /* Sort flows by OOR netdevs, state (offloaded/pending) and pps-rate  */
    qsort(sort_flows, total_flow_count, sizeof(struct udpif_key *),
          flow_compare_rebalance);

    /*
     * We now have flows referencing OOR netdevs, that are sorted. We also
     * have a count of offloaded and pending flows on each of the netdevs
     * that are in OOR state. Now rebalance each oor-netdev.
     */
    while (oor_netdev_count) {
        struct netdev *netdev;
        int offload_count;
        int pending_count;
        bool oor;

        netdev = sort_flows[offload_index]->in_netdev;
        ovs_assert(netdev_get_hw_info(netdev, HW_INFO_TYPE_OOR) == true);
        VLOG_DBG("Offload rebalance: netdev: %s is OOR", netdev->name);

        offload_count = netdev_get_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT);
        pending_count = netdev_get_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT);
        pending_index = offload_index + offload_count;

        oor = rebalance_device(udpif,
                               &sort_flows[offload_index], offload_count,
                               &sort_flows[pending_index], pending_count);
        netdev_set_hw_info(netdev, HW_INFO_TYPE_OOR, oor);

        offload_index = pending_index + pending_count;
        netdev_set_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT, 0);
        netdev_set_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT, 0);
        oor_netdev_count--;
    }

    for (int i = 0; i < total_flow_count; i++) {
        struct udpif_key *ukey = sort_flows[i];
        ukey_netdev_unref(ukey);
    }
    free(sort_flows);
}

static int
udpif_flow_program(struct udpif *udpif, struct udpif_key *ukey,
                   enum dpif_offload_type offload_type)
{
    struct dpif_op *opsp;
    struct ukey_op uop;

    opsp = &uop.dop;
    put_op_init(&uop, ukey, DPIF_FP_CREATE);
    dpif_operate(udpif->dpif, &opsp, 1, offload_type);

    return opsp->error;
}

static int
udpif_flow_unprogram(struct udpif *udpif, struct udpif_key *ukey,
                     enum dpif_offload_type offload_type)
{
    struct dpif_op *opsp;
    struct ukey_op uop;

    opsp = &uop.dop;
    delete_op_init(udpif, &uop, ukey);
    dpif_operate(udpif->dpif, &opsp, 1, offload_type);

    return opsp->error;
}
-												ofproto-dpif-upcall: Avoid double-delete of ukeys.

revalidate_sweep__() has two cases where it calls ukey_delete() to
remove a ukey from the umap via cmap_remove().  The first case is a direct
call to ukey_delete(), when !flow_exists.  The second case is an indirect
call via push_ukey_ops(), when result != UKEY_KEEP.  If both of these
conditions are simultaneously true, however, the code would call
ukey_delete() twice, causing an assertion failure in the second call.  This
commit fixes the problem by eliminating one of the calls.

The version tested by Ben Warren differs from this version, see:
    http://openvswitch.org/pipermail/dev/2016-January/064117.html

Reported-by: Keith Holleman <keith.holleman@gmail.com>
Reported-at: http://openvswitch.org/pipermail/discuss/2015-December/019772.html
CC: Joe Stringer <joe@ovn.org>
VMware-BZ: #1579057
Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Ben Warren <ben@skyportsystems.com>

											
										
										
											2016-01-06 15:44:39 -08:00
+								/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.  */
 								#include <config.h>
 								#include "ofproto-dpif-upcall.h"
 								#include <errno.h>
 								#include <stdbool.h>
 								#include <inttypes.h>
-												ofproto, ofp-util: Begin disentangling packet-in wire format and handling.

struct ofputil_packet_in mixes data included in OpenFlow packet_in messages
with data that used internally by ofproto and connmgr to queue and route
packet_ins.  This commit begins disentangling these purposes by adding a
new struct ofproto_packet_in that wraps struct ofputil_packet_in.  Adding
this new level of indirection causes a lot of code churn, so this commit
mainly takes care of that to make the remaining changes easier to read.

This commit does move the list node used for queuing packet_ins into the
new wrapper structure.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-10-22 16:16:31 -07:00
+								#include "connmgr.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "coverage.h"
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								#include "cmap.h"
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								#include "lib/dpif-provider.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "dpif.h"
-												Move lib/dynamic-string.h to include/openvswitch directory

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-03 10:20:46 -08:00
+								#include "openvswitch/dynamic-string.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "fail-open.h"
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
+								#include "guarded-list.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "latch.h"
-												list: Remove lib/list.h completely.

All code is now in include/openvswitch/list.h.

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:21 -07:00
+								#include "openvswitch/list.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "netlink.h"
-												Move lib/ofpbuf.h to include/openvswitch directory

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:24 -07:00
+								#include "openvswitch/ofpbuf.h"
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								#include "ofproto-dpif-ipfix.h"
 								#include "ofproto-dpif-sflow.h"
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								#include "ofproto-dpif-xlate.h"
-												ofproto-dpif-xlate: Expose xlate cache.

Later patches will need to create xlate cache entries from different
modules.  This patch refactors the xlate cache code in preparation
without any functional changes, so that the changes are clearly
visible in the following patches.

The definition of XC_ENTRY_FOR_EACH() iterator macro is changed so
that it now does not take the xlate cache pointer to unify the usage
accross all call sites.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-09-14 16:51:27 -07:00
+								#include "ofproto-dpif-xlate-cache.h"
-												xlate: auto ofproto trace when recursion too deep

Usually ofproto/trace is used to debug the flow translation error.
When translation error such as recursion too deep or too many resubmit,
the issue might happen momentary; flows causing the recursion expire
when users try to debug it.  This patch enables the ofproto trace
automatically when recursion is too deep or too many resubmit, by
invoking the translation again, and log the ofproto trace as warnings.
Since the log will be huge, rate limit to one per minute.

VMWare-BZ: #2054659
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-28 16:32:27 -08:00
+								#include "ofproto-dpif-trace.h"
-												ovs-rcu: New library.

RCU allows multiple threads to read objects in parallel without any
performance penalty.  The following commit will introduce the first use.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-18 16:34:28 -07:00
+								#include "ovs-rcu.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								#include "packets.h"
-												lib: Move lib/poll-loop.h to include/openvswitch

Poll-loop is the core to implement main loop. It should be available in
libopenvswitch.

Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-11-03 13:53:53 +08:00
+								#include "openvswitch/poll-loop.h"
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								#include "seq.h"
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								#include "tunnel.h"
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								#include "unixctl.h"
-												utilities: Add revalidator measurement script and needed USDT probes.

This patch adds a Python script that can be used to analyze the
revalidator runs by providing statistics (including some real time
graphs).

The USDT events can also be captured to a file and used for
later offline analysis.

The following blog explains the Open vSwitch revalidator
implementation and how this tool can help you understand what is
happening in your system.

https://developers.redhat.com/articles/2022/10/19/open-vswitch-revalidator-process-explained

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-23 12:03:29 +01:00
+								#include "openvswitch/usdt-probes.h"
-												lib: Move vlog.h to <openvswitch/vlog.h>

A new function vlog_insert_module() is introduced to avoid using
list_insert() from the vlog.h header.

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-15 14:10:38 +01:00
+								#include "openvswitch/vlog.h"
-												revalidator: Gather packets-per-second rate of flows

This is the second patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The packets-per-second (pps) rate for each flow is computed in the context
of revalidator threads when the flow stats are retrieved. The pps-rate is
computed only after a flow is revalidated and is not scheduled for
deletion. The parameters used to compute pps and the pps itself are saved
in udpif_key since they need to be persisted across iterations of
rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:13 +05:30
+								#include "lib/netdev-provider.h"
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								#define UPCALL_MAX_BATCH 64
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								#define REVALIDATE_MAX_BATCH 50
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								#define UINT64_THREE_QUARTERS (UINT64_MAX / 4 * 3)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
 								VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall);
-												revalidator: Distinguish new and duplicate flows.

We previously counted flows that have been installed during the current
dump as duplicates, rather than recognising them as new flows. This
patch separates the counters out for these two cases.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-26 17:28:05 +00:00
+								COVERAGE_DEFINE(dumped_duplicate_flow);
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								COVERAGE_DEFINE(dumped_inconsistent_flow);
-												revalidator: Distinguish new and duplicate flows.

We previously counted flows that have been installed during the current
dump as duplicates, rather than recognising them as new flows. This
patch separates the counters out for these two cases.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-26 17:28:05 +00:00
+								COVERAGE_DEFINE(dumped_new_flow);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								COVERAGE_DEFINE(handler_duplicate_upcall);
-												revalidator: Revalidate missed flows.

If the datapath doesn't dump a flow for some reason, and the current
dump is expected to revalidate all flows in the datapath, then perform
revalidation for those flows by fetching them during the sweep phase.
If revalidation is not required, then leave the flow in the datapath and
don't revalidate it.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-08 07:04:05 +00:00
+								COVERAGE_DEFINE(revalidate_missed_dp_flow);
-												ofproto-dpif-upcall: Avoid stale ukeys leaks.

It is observed in some environments that there are much more ukeys than
actual DP flows. For example:

$ ovs-appctl upcall/show
system@ovs-system:
flows : (current 7) (avg 6) (max 117) (limit 2125)
offloaded flows : 525
dump duration : 1063ms
ufid enabled : true

23: (keys 3612)
24: (keys 3625)
25: (keys 3485)

The revalidator threads are busy revalidating the stale ukeys leading to
high CPU and long dump duration.

This patch tracks the number of consecutive missed dumps. If four dumps
are missed in a row, it is assumed that the datapath flow no longer
exists, and the ukey can be deleted.

Reported-by: Roi Dayan <roid@nvidia.com>
Co-authored-by: Han Zhou <hzhou@ovn.org>
Co-authored-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-08-29 09:00:06 +02:00
+								COVERAGE_DEFINE(revalidate_missing_dp_flow);
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								COVERAGE_DEFINE(ukey_dp_change);
 								COVERAGE_DEFINE(ukey_invalid_stat_reset);
-												ofproto-dpif-upcall: Fix ukey installation failure logs and counters.

ukey_install() returns boolean signaling if the ukey was installed
or not.  Installation may fail for a few reasons:

 1. Conflicting ukey.
 2. Mutex contention while trying to replace existing ukey.
 3. The same ukey already exists and active.

Only the first case here signals an actual problem.  Third one is
a little odd for userspace datapath, but harmless.  Second is the
most common one that can easily happen during normal operation
since other threads like revalidators may be currently working on
this ukey preventing an immediate access.

Since only the first case is actually worth logging and it already
has its own log message, removing the 'upcall installation fails'
warning from the upcall_cb().  This should fix most of the random
failures of userspace system tests in CI.

While at it, also fixing coverage counters.  Mutex contention was
mistakenly counted as a duplicate upcall.  ukey contention for
revalidators was counted only in one of two places.

New counter added for the ukey contention on replace.  We should
not re-use existing upcall_ukey_contention counter for this, since
it may lead to double counting.

Fixes: 67f08985d769 ("upcall: Replace ukeys for deleted flows.")
Fixes: 9cec8274ed9a ("ofproto-dpif-upcall: Add VLOG_WARN_RL logs for upcall_cb() error.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-04-04 14:09:37 +02:00
+								COVERAGE_DEFINE(ukey_replace_contention);
-												ofproto-dpif-upcall: Add flow_limit coverage counters.

Add new coverage counters that might help debugging flow_limit
related issues.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-01-10 12:25:56 +01:00
+								COVERAGE_DEFINE(upcall_flow_limit_grew);
-												ofproto: report coverage on hitting datapath flow limit

Whenever the number of flows in the datapath crosses above
the flow limit set/autoconfigured, it is helpful to report
this event through coverage counter for an operator/devops
engineer to know and take proactive corrections in the
switch configuration.

Today, these events are reported in ovs vswitch log when
a new flow can not be inserted in upcall processing in which
case ovs writes a warning, otherwise an auto correction
made by ovs to flush old flows without any intimation at all.

Signed-off-by: Gowrishankar Muthukrishnan <gmuthukr@redhat.com>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-04-20 19:13:42 +05:30
+								COVERAGE_DEFINE(upcall_flow_limit_hit);
-												ofproto-dpif-upcall: Log the emergency flow flush.

When the number of flows in the datapath reaches twice the
maximum, revalidators will delete all flows as an emergency
action to recover. In that case, log a message with values
and increase a coverage counter.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-09-30 16:23:59 -03:00
+								COVERAGE_DEFINE(upcall_flow_limit_kill);
-												ofproto-dpif-upcall: Add flow_limit coverage counters.

Add new coverage counters that might help debugging flow_limit
related issues.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-01-10 12:25:56 +01:00
+								COVERAGE_DEFINE(upcall_flow_limit_reduced);
 								COVERAGE_DEFINE(upcall_flow_limit_scaled);
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								COVERAGE_DEFINE(upcall_ukey_contention);
 								COVERAGE_DEFINE(upcall_ukey_replace);
-												revalidator: Prevent handling the same flow twice.

When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.

This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.

The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."

This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.

We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-23 15:31:17 +12:00
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								/* A thread that reads upcalls from dpif, forwards each upcall's packet,
 								 * and possibly sets up a kernel flow as a cache. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								struct handler {
 								    struct udpif *udpif;               /* Parent udpif. */
 								    pthread_t thread;                  /* Thread ID. */
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								    uint32_t handler_id;               /* Handler id. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								};
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								/* In the absence of a multiple-writer multiple-reader datastructure for
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								 * storing udpif_keys ("ukeys"), we use a large number of cmaps, each with its
 								 * own lock for writing. */
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								#define N_UMAPS 512 /* per udpif. */
 								struct umap {
 								    struct ovs_mutex mutex;            /* Take for writing to the following. */
 								    struct cmap cmap;                  /* Datapath flow keys. */
 								};
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								/* A thread that processes datapath flows, updates OpenFlow statistics, and
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								 * updates or removes them if necessary.
 								 *
 								 * Revalidator threads operate in two phases: "dump" and "sweep". In between
 								 * each phase, all revalidators sync up so that all revalidator threads are
 								 * either in one phase or the other, but not a combination.
 								 *
 								 *     During the dump phase, revalidators fetch flows from the datapath and
 								 *     attribute the statistics to OpenFlow rules. Each datapath flow has a
 								 *     corresponding ukey which caches the most recently seen statistics. If
 								 *     a flow needs to be deleted (for example, because it is unused over a
 								 *     period of time), revalidator threads may delete the flow during the
 								 *     dump phase. The datapath is not guaranteed to reliably dump all flows
 								 *     from the datapath, and there is no mapping between datapath flows to
 								 *     revalidators, so a particular flow may be handled by zero or more
 								 *     revalidators during a single dump phase. To avoid duplicate attribution
 								 *     of statistics, ukeys are never deleted during this phase.
 								 *
 								 *     During the sweep phase, each revalidator takes ownership of a different
 								 *     slice of umaps and sweeps through all ukeys in those umaps to figure out
 								 *     whether they need to be deleted. During this phase, revalidators may
 								 *     fetch individual flows which were not dumped during the dump phase to
 								 *     validate them and attribute statistics.
 								 */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								struct revalidator {
 								    struct udpif *udpif;               /* Parent udpif. */
 								    pthread_t thread;                  /* Thread ID. */
-												ovs-thread: Make caller provide thread name when creating a thread.

Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one.  This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible.  This also simplifies code, since two function calls become
only one.

This makes a few other changes to the thread creation function:

    * Since it is no longer a direct wrapper around a pthread function,
      rename it to avoid giving that impression.

    * Remove 'pthread_attr_t *' param that every caller supplied as NULL.

    * Change 'pthread *' parameter into a return value, for convenience.

The system-stats code hadn't set a thread name, so this fixes that issue.

This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself.  Setting the thread
name earlier, as in this patch, avoids the problem.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-25 17:46:21 -07:00
+								    unsigned int id;                   /* ovsthread_id_self(). */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								};
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								/* An upcall handler for ofproto_dpif.
 								 *
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								 * udpif keeps records of two kind of logically separate units:
 								 *
 								 * upcall handling
 								 * ---------------
 								 *
 								 *    - An array of 'struct handler's for upcall handling and flow
 								 *      installation.
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								 *
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								 * flow revalidation
 								 * -----------------
 								 *
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								 *    - Revalidation threads which read the datapath flow table and maintains
 								 *      them.
 								 */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								struct udpif {
-												list: Rename struct list to struct ovs_list

struct list is a common name and can't be used in public headers.

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-15 14:10:38 +01:00
+								    struct ovs_list list_node;         /* In all_udpifs list. */
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    struct dpif *dpif;                 /* Datapath handle. */
 								    struct dpif_backer *backer;        /* Opaque dpif_backer pointer. */
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								    struct handler *handlers;          /* Upcall handlers. */
-												ofproto: Change type of n_handlers and n_revalidators.

'n_handlers' and 'n_revalidators' are declared as type 'size_t'.
However, dpif_handlers_set() requires parameter 'n_handlers' as
type 'uint32_t'. This patch fixes this type mismatch.

Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:34 -04:00
+								    uint32_t n_handlers;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    struct revalidator *revalidators;  /* Flow revalidators. */
-												ofproto: Change type of n_handlers and n_revalidators.

'n_handlers' and 'n_revalidators' are declared as type 'size_t'.
However, dpif_handlers_set() requires parameter 'n_handlers' as
type 'uint32_t'. This patch fixes this type mismatch.

Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:34 -04:00
+								    uint32_t n_revalidators;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
 								    struct latch exit_latch;           /* Tells child threads to exit. */
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    /* Revalidation. */
 								    struct seq *reval_seq;             /* Incremented to force revalidation. */
 								    bool reval_exit;                   /* Set by leader on 'exit_latch. */
-												ovs-thread: Implement OVS specific barrier.

Non-leader revalidator thread uses pthread_barrier_* functions in their
main loop to synchronize with leader thread.  However, since those threads
only call poll_block() intermittently, the poll interval check in
poll_block() can wrongly take the time since last call as poll interval
and issue the following warnings:

"Unreasonably long XXXXms poll interval".

To prevent it, this commit implements the barrier struct and operations
for OVS which allow thread to block on barrier via poll_block().

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>


											
										
										
											2014-05-29 15:37:37 -07:00
+								    struct ovs_barrier reval_barrier;  /* Barrier used by revalidators. */
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    struct dpif_flow_dump *dump;       /* DPIF flow dump state. */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    long long int dump_duration;       /* Duration of the last flow dump. */
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    struct seq *dump_seq;              /* Increments each dump iteration. */
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    atomic_bool enable_ufid;           /* If true, skip dumping flow attrs. */
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								    /* These variables provide a mechanism for the main thread to pause
 								     * all revalidation without having to completely shut the threads down.
 								     * 'pause_latch' is shared between the main thread and the lead
 								     * revalidator thread, so when it is desirable to halt revalidation, the
 								     * main thread will set the latch. 'pause' and 'pause_barrier' are shared
 								     * by revalidator threads. The lead revalidator will set 'pause' when it
 								     * observes the latch has been set, and this will cause all revalidator
 								     * threads to wait on 'pause_barrier' at the beginning of the next
 								     * revalidation round. */
 								    bool pause;                        /* Set by leader on 'pause_latch. */
 								    struct latch pause_latch;          /* Set to force revalidators pause. */
 								    struct ovs_barrier pause_barrier;  /* Barrier used to pause all */
 								                                       /* revalidators by main thread. */
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    /* There are 'N_UMAPS' maps containing 'struct udpif_key' elements.
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								     *
 								     * During the flow dump phase, revalidators insert into these with a random
 								     * distribution. During the garbage collection phase, each revalidator
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								     * takes care of garbage collecting a slice of these maps. */
 								    struct umap *ukeys;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    /* Datapath flow statistics. */
 								    unsigned int max_n_flows;
 								    unsigned int avg_n_flows;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    /* Following fields are accessed and modified by different threads. */
 								    atomic_uint flow_limit;            /* Datapath flow hard limit. */
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
 								    /* n_flows_mutex prevents multiple threads updating these concurrently. */
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								    atomic_uint n_flows;               /* Number of flows in the datapath. */
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								    atomic_llong n_flows_timestamp;    /* Last time n_flows was updated. */
 								    struct ovs_mutex n_flows_mutex;
-												udpif: Add command to wait for revalidation.

This allows us to remove some of the sleeps from the testsuite.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-25 14:02:45 +00:00
 								    /* Following fields are accessed and modified only from the main thread. */
 								    struct unixctl_conn **conns;       /* Connections waiting on dump_seq. */
 								    uint64_t conn_seq;                 /* Corresponds to 'dump_seq' when
 								                                          conns[n_conns-1] was stored. */
 								    size_t n_conns;                    /* Number of connections waiting. */
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
 								    long long int offload_rebalance_time;  /* Time of last offload rebalance */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								};
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								enum upcall_type {
 								    BAD_UPCALL,                 /* Some kind of bug somewhere. */
 								    MISS_UPCALL,                /* A flow miss.  */
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    SLOW_PATH_UPCALL,           /* Slow path upcall.  */
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								    SFLOW_UPCALL,               /* sFlow sample. */
 								    FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								    IPFIX_UPCALL,               /* Per-bridge sampling. */
 								    CONTROLLER_UPCALL           /* Destined for the controller. */
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								};
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								enum reval_result {
 								    UKEY_KEEP,
 								    UKEY_DELETE,
 								    UKEY_MODIFY
 								};
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								struct upcall {
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    struct ofproto_dpif *ofproto;  /* Parent ofproto. */
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								    const struct recirc_id_node *recirc; /* Recirculation context. */
 								    bool have_recirc_ref;                /* Reference held on recirc ctx? */
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    /* The flow and packet are only required to be constant when using
 								     * dpif-netdev.  If a modification is absolutely necessary, a const cast
 								     * may be used with other datapaths. */
 								    const struct flow *flow;       /* Parsed representation of the packet. */
-												ofproto-dpif-upcall: Slow path flows that datapath can't fully match.

In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:

    - The flow lacks one or more fields that userspace expects to be there,
      that is, the datapath doesn't understand or parse them but userspace
      does.  This is, for example, what would happen if current OVS
      userspace, which understands and extracts TCP flags, were to be
      paired with an older OVS kernel module, which does not.  Internally
      OVS uses the name ODP_FIT_TOO_LITTLE for this situation.

    - The flow includes fields that userspace does not know about, that is,
      the datapath understands and parses them but userspace does not.
      This is, for example, what would happen if an old OVS userspace that
      does not understand or extract TCP flags, were to be paired with a
      recent OVS kernel module that does.  Internally, OVS uses the name
      ODP_FIT_TOO_MUCH for this situation.

The latter is not a big deal and OVS doesn't have to do much to cope with
it.

The former is more of a problem.  When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path.  Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.

Somehow, this nuance was lost a long time.  From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE.  Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it.  I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix.  This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.

CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-24 11:40:19 -08:00
+								    enum odp_key_fitness fitness;  /* Fitness of 'flow' relative to ODP key. */
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								    const ovs_u128 *ufid;          /* Unique identifier for 'flow'. */
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								    unsigned pmd_id;               /* Datapath poll mode driver id. */
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    const struct dp_packet *packet;   /* Packet associated with this upcall. */
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								    ofp_port_t ofp_in_port;        /* OpenFlow in port, or OFPP_NONE. */
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								    uint16_t mru;                  /* If !0, Maximum receive unit of
 								                                      fragmented IP packet */
-												ofproto-dpif-upcall: Echo HASH attribute back to datapath.

The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.

The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-15 10:58:59 +08:00
+								    uint64_t hash;
-												dpif-netlink: Provide original upcall pid in 'execute' commands.

When a packet enters kernel datapath and there is no flow to handle it,
packet goes to userspace through a MISS upcall.  With per-CPU upcall
dispatch mechanism, we're using the current CPU id to select the
Netlink PID on which to send this packet.  This allows us to send
packets from the same traffic flow through the same handler.

The handler will process the packet, install required flow into the
kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE.

While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a
recirculation action that will pass the (likely modified) packet
through the flow lookup again.  And if the flow is not found, the
packet will be sent to userspace again through another MISS upcall.

However, the handler thread in userspace is likely running on a
different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled
in the syscall context of that thread.  So, when the time comes to
send the packet through another upcall, the per-CPU dispatch will
choose a different Netlink PID, and this packet will end up processed
by a different handler thread on a different CPU.

The process continues as long as there are new recirculations, each
time the packet goes to a different handler thread before it is sent
out of the OVS datapath to the destination port.  In real setups the
number of recirculations can go up to 4 or 5, sometimes more.

There is always a chance to re-order packets while processing upcalls,
because userspace will first install the flow and then re-inject the
original packet.  So, there is a race window when the flow is already
installed and the second packet can match it inside the kernel and be
forwarded to the destination before the first packet is re-injected.
But the fact that packets are going through multiple upcalls handled
by different userspace threads makes the reordering noticeably more
likely, because we not only have a race between the kernel and a
userspace handler (which is hard to avoid), but also between multiple
userspace handlers.

For example, let's assume that 10 packets got enqueued through a MISS
upcall for handler-1, it will start processing them, will install the
flow into the kernel and start re-injecting packets back, from where
they will go through another MISS to handler-2.  Handler-2 will install
the flow into the kernel and start re-injecting the packets, while
handler-1 continues to re-inject the last of the 10 packets, they will
hit the flow installed by handler-2 and be forwarded without going to
the handler-2, while handler-2 still re-injects the first of these 10
packets.  Given multiple recirculations and misses, these 10 packets
may end up completely mixed up on the output from the datapath.

Let's provide the original upcall PID via the new netlink attribute
OVS_PACKET_ATTR_UPCALL_PID.  This way the upcall triggered during the
execution will go to the same handler.  Packets will be enqueued to
the same socket and re-injected in the same order.  This doesn't
eliminate re-ordering as stated above, since we still have a race
between the kernel and the handler thread, but it allows to eliminate
races between multiple handlers.

The openvswitch kernel module ignores unknown attributes for the
OVS_PACKET_CMD_EXECUTE, so it's safe to provide it even on older
kernels.

Reported-at: https://issues.redhat.com/browse/FDP-1479
Link: https://lore.kernel.org/netdev/20250702155043.2331772-1-i.maximets@ovn.org/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-07-08 13:34:02 +02:00
+								    uint32_t pid;                  /* Socket PID this upcall was received from,
 								                                    * or zero. */
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    enum upcall_type type;         /* Type of the upcall. */
-												Extend sFlow agent to report tunnel and MPLS structures

Packets are still sampled at ingress only, so the egress
tunnel and/or MPLS structures are only included when there is just 1 output
port.  The actions are either provided by the datapath in the sample upcall
or looked up in the userspace cache.  The former is preferred because it is
more reliable and does not present any new demands or constraints on the
userspace cache, however the code falls back on the userspace lookup so that
this solution can work with existing kernel datapath modules. If the lookup
fails it is not critical: the compiled user-action-cookie is still available
and provides the essential output port and output VLAN forwarding information
just as before.

The openvswitch actions can express almost any tunneling/mangling so the only
totally faithful representation would be to somehow encode the whole list of
flow actions in the sFlow output.  However the standard sFlow tunnel structures
can express most common real-world scenarios, so in parsing the actions we
look for those and skip the encoding if we see anything unusual. For example,
a single set(tunnel()) or tnl_push() is interpreted,  but if a second such
action is encountered then the egress tunnel reporting is suppressed.

The sFlow standard allows "best effort" encoding so that if a field is not
knowable or too onerous to look up then it can be left out. This is often
the case for the layer-4 source port or even the src ip address of a tunnel.
The assumption is that monitoring is enabled everywhere so a missing field
can typically be seen at ingress to the next switch in the path.

This patch also adds unit tests to check the sFlow encoding of set(tunnel()),
tnl_push() and push_mpls() actions.

The netlink attribute to request that actions be included in the upcall
from the datapath is inserted for sFlow sampling only.  To make that option
be explicit would require further changes to the printing and parsing of
actions in lib/odp-util.c, and to scripts in the test suite.

Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT
transformations can follow in future patches that make only incremental
changes.

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
[blp@nicira.com made stylistic and semantic changes]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 21:37:02 -07:00
+								    const struct nlattr *actions;  /* Flow actions in DPIF_UC_ACTION Upcalls. */
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
 								    bool xout_initialized;         /* True if 'xout' must be uninitialized. */
 								    struct xlate_out xout;         /* Result of xlate_actions(). */
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
+								    struct ofpbuf odp_actions;     /* Datapath actions from xlate_actions(). */
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								    struct flow_wildcards wc;      /* Dependencies that megaflow must match. */
-												ofproto-dpif-upcall: Fix typo in comment.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-07-22 16:38:18 -07:00
+								    struct ofpbuf put_actions;     /* Actions 'put' in the fastpath. */
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												ofproto-dpif-xlate: Do not take references in xlate_receive().

xlate configuration (via xcfg) is already holding the references, so
the callers of xlate_receive() can safely use the returned pointers
upto the point when they quiesce.  If the objects are needed for
longer term, the callers can take additional references themselves.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-08-22 09:01:36 -07:00
+								    struct dpif_ipfix *ipfix;      /* IPFIX pointer or NULL. */
 								    struct dpif_sflow *sflow;      /* SFlow pointer or NULL. */
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    struct udpif_key *ukey;        /* Revalidator flow cache. */
 								    bool ukey_persists;            /* Set true to keep 'ukey' beyond the
 								                                      lifetime of this upcall. */
 								    uint64_t reval_seq;            /* udpif->reval_seq at translation time. */
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    /* Not used by the upcall callback interface. */
 								    const struct nlattr *key;      /* Datapath flow key. */
 								    size_t key_len;                /* Datapath flow key length. */
-												Extend OVS IPFIX exporter to export tunnel headers

Extend IPFIX exporter to export tunnel headers when both input and output
of the port.
Add three other_config options in IPFIX table: enable-input-sampling,
enable-output-sampling and enable-tunnel-sampling, to control whether
sampling tunnel info, on which direction (input or output).
Insert sampling action before output action and the output tunnel port
is sent to datapath in the sampling action.
Make datapath collect output tunnel info and send it back to userpace
in upcall message with a new additional optional attribute.
Add a tunnel ports map to make the tunnel port lookup faster in sampling
upcalls in IPFIX exporter. Make the IPFIX exporter generate IPFIX template
sets with enterprise elements for the tunnel info, save the tunnel info
in IPFIX cache entries, and send IPFIX DATA with tunnel info.
Add flowDirection element in IPFIX templates.

Signed-off-by: Wenyu Zhang <wenyuz@vmware.com>
Acked-by: Romain Lenglet <rlenglet@vmware.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-17 20:19:36 -07:00
+								    const struct nlattr *out_tun_key;  /* Datapath output tunnel key. */
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    struct user_action_cookie cookie;
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
+								    uint64_t odp_actions_stub[1024 / 8]; /* Stub for odp_actions. */
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								};
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								/* Ukeys must transition through these states using transition_ukey(). */
 								enum ukey_state {
 								    UKEY_CREATED = 0,
 								    UKEY_VISIBLE,       /* Ukey is in umap, datapath flow install is queued. */
 								    UKEY_OPERATIONAL,   /* Ukey is in umap, datapath flow is installed. */
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								    UKEY_INCONSISTENT,  /* Ukey is in umap, datapath flow is inconsistent. */
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    UKEY_EVICTING,      /* Ukey is in umap, datapath flow delete is queued. */
 								    UKEY_EVICTED,       /* Ukey is in umap, datapath flow is deleted. */
 								    UKEY_DELETED,       /* Ukey removed from umap, ukey free is deferred. */
 								};
 								#define N_UKEY_STATES (UKEY_DELETED + 1)
-												utilities: Correct deletion reason in flow_reval_monitor.py.

The flow_reval_monitor.py script incorrectly reported the reasons for
FDR_PURGE and FDR_TOO_EXPENSIVE, as their descriptions were swapped.
This patch rectifies the order using a dictionary to avoid similar
problems in the future.

In addition this patch also syncs the delete reason output of the
script, with the comments in the code.

Fixes: 86b9e653ef22 ("revalidator: Add a USDT probe during flow deletion with purge reason.")
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-05-14 15:15:34 +02:00
+								/* Ukey delete reasons used by USDT probes.  Please keep in sync with the
 								 * definition in utilities/usdt-scripts/flow_reval_monitor.py.  */
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								enum flow_del_reason {
-												utilities: Correct deletion reason in flow_reval_monitor.py.

The flow_reval_monitor.py script incorrectly reported the reasons for
FDR_PURGE and FDR_TOO_EXPENSIVE, as their descriptions were swapped.
This patch rectifies the order using a dictionary to avoid similar
problems in the future.

In addition this patch also syncs the delete reason output of the
script, with the comments in the code.

Fixes: 86b9e653ef22 ("revalidator: Add a USDT probe during flow deletion with purge reason.")
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-05-14 15:15:34 +02:00
+								    FDR_NONE = 0,           /* No delete reason specified. */
 								    FDR_AVOID_CACHING,      /* Cache avoidance flag set. */
 								    FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
 								    FDR_FLOW_IDLE,          /* Flow idle timeout. */
 								    FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
 								    FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
 								    FDR_NO_OFPROTO,         /* Bridge not found. */
 								    FDR_PURGE,              /* User requested flow deletion. */
 								    FDR_TOO_EXPENSIVE,      /* Too expensive to revalidate. */
 								    FDR_UPDATE_FAIL,        /* Datapath update failed. */
 								    FDR_XLATION_ERROR,      /* Flow translation error. */
-												ofproto-dpif-upcall: Avoid stale ukeys leaks.

It is observed in some environments that there are much more ukeys than
actual DP flows. For example:

$ ovs-appctl upcall/show
system@ovs-system:
flows : (current 7) (avg 6) (max 117) (limit 2125)
offloaded flows : 525
dump duration : 1063ms
ufid enabled : true

23: (keys 3612)
24: (keys 3625)
25: (keys 3485)

The revalidator threads are busy revalidating the stale ukeys leading to
high CPU and long dump duration.

This patch tracks the number of consecutive missed dumps. If four dumps
are missed in a row, it is assumed that the datapath flow no longer
exists, and the ukey can be deleted.

Reported-by: Roi Dayan <roid@nvidia.com>
Co-authored-by: Han Zhou <hzhou@ovn.org>
Co-authored-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-08-29 09:00:06 +02:00
+								    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								};
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								/* 'udpif_key's are responsible for tracking the little bit of state udpif
 								 * needs to do flow expiration which can't be pulled directly from the
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								 * datapath.  They may be created by any handler or revalidator thread at any
 								 * time, and read by any revalidator during the dump phase. They are however
 								 * each owned by a single revalidator which takes care of destroying them
 								 * during the garbage-collection phase.
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								 *
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								 * The mutex within the ukey protects some members of the ukey. The ukey
 								 * itself is protected by RCU and is held within a umap in the parent udpif.
 								 * Adding or removing a ukey from a umap is only safe when holding the
 								 * corresponding umap lock. */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								struct udpif_key {
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								    struct cmap_node cmap_node;     /* In parent revalidator 'ukeys' map. */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    /* These elements are read only once created, and therefore aren't
 								     * protected by a mutex. */
 								    const struct nlattr *key;      /* Datapath flow key. */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    size_t key_len;                /* Length of 'key'. */
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								    const struct nlattr *mask;     /* Datapath flow mask. */
 								    size_t mask_len;               /* Length of 'mask'. */
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								    ovs_u128 ufid;                 /* Unique flow identifier. */
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								    bool ufid_present;             /* True if 'ufid' is in datapath. */
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								    uint32_t hash;                 /* Pre-computed hash for 'key'. */
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								    unsigned pmd_id;               /* Datapath poll mode driver id. */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    struct ovs_mutex mutex;                   /* Guards the following. */
 								    struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								    const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    long long int created OVS_GUARDED;        /* Estimate of creation time. */
-												revalidator: Replace ukey->mark with dump_seq.

Rather than setting and resetting the 'mark' field in the ukey, this
patch introduces a seq to track whether a flow has been seen during the
most recent dump. This tidies the code and simplifies the logic for
detecting when flows are duplicated from the datapath.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-14 16:17:25 +12:00
+								    uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
-												ofproto-dpif-upcall: Avoid stale ukeys leaks.

It is observed in some environments that there are much more ukeys than
actual DP flows. For example:

$ ovs-appctl upcall/show
system@ovs-system:
flows : (current 7) (avg 6) (max 117) (limit 2125)
offloaded flows : 525
dump duration : 1063ms
ufid enabled : true

23: (keys 3612)
24: (keys 3625)
25: (keys 3485)

The revalidator threads are busy revalidating the stale ukeys leading to
high CPU and long dump duration.

This patch tracks the number of consecutive missed dumps. If four dumps
are missed in a row, it is assumed that the datapath flow no longer
exists, and the ukey can be deleted.

Reported-by: Roi Dayan <roid@nvidia.com>
Co-authored-by: Han Zhou <hzhou@ovn.org>
Co-authored-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-08-29 09:00:06 +02:00
+								    uint32_t missed_dumps OVS_GUARDED;        /* Missed consecutive dumps. */
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
-												revalidator: Improve logging for transition_ukey().

There are a few cases where more introspection into ukey transitions
would be relevant for logging or assertion. Track the SOURCE_LOCATOR and
thread id when states are transitioned and use these for logging.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:12 -07:00
+								    /* 'state' debug information. */
 								    unsigned int state_thread OVS_GUARDED;    /* Thread that transitions. */
 								    const char *state_where OVS_GUARDED;      /* transition_ukey() locator. */
-												ofproto-dpif-upcall: Make ukey actions modifiable with RCU.

Future patches will need to modify ukey actions in some instances.
This patch makes this possible by protecting them with RCU.  It also
adds thread safety checks to enforce the new protection mechanism.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-12 14:50:54 -07:00
+								    /* Datapath flow actions as nlattrs.  Protected by RCU.  Read with
 								     * ukey_get_actions(), and write with ukey_set_actions(). */
 								    OVSRCU_TYPE(struct ofpbuf *) actions;
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
 								    struct xlate_cache *xcache OVS_GUARDED;   /* Cache for xlate entries that
 								                                               * are affected by this ukey.
 								                                               * Used for stats and learning.*/
-												Fix strict aliasing violations with GCC 4.1 and 4.4.

The typical use of struct sockaddr_storage is flagged as a strict
aliasing violation by GCC 4.4.7.  Using an explicit union lets the
compiler know that accessing the same location via different types is
not an error.

GCC 4.1.2 had a similar complaint about a cast of ukey's key_buf to
nlattr.  After this patch there are no further warnings with the
XenServer build, so we could start treating warnings as errors in the
builds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-05 13:51:19 -07:00
+								    union {
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								        struct odputil_keybuf buf;
 								        struct nlattr nla;
 								    } keybuf, maskbuf;
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								    uint32_t key_recirc_id;   /* Non-zero if reference is held by the ukey. */
 								    struct recirc_refs recircs;  /* Action recirc IDs with references held. */
-												revalidator: Gather packets-per-second rate of flows

This is the second patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The packets-per-second (pps) rate for each flow is computed in the context
of revalidator threads when the flow stats are retrieved. The pps-rate is
computed only after a flow is revalidated and is not scheduled for
deletion. The parameters used to compute pps and the pps itself are saved
in udpif_key since they need to be persisted across iterations of
rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:13 +05:30
 								#define OFFL_REBAL_INTVL_MSEC  3000	/* dynamic offload rebalance freq */
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								    struct netdev *in_netdev;		/* in_odp_port's netdev */
-												revalidator: Gather packets-per-second rate of flows

This is the second patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The packets-per-second (pps) rate for each flow is computed in the context
of revalidator threads when the flow stats are retrieved. The pps-rate is
computed only after a flow is revalidated and is not scheduled for
deletion. The parameters used to compute pps and the pps itself are saved
in udpif_key since they need to be persisted across iterations of
rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:13 +05:30
+								    bool offloaded;			/* True if flow is offloaded */
 								    uint64_t flow_pps_rate;		/* Packets-Per-Second rate */
 								    long long int flow_time;		/* last pps update time */
 								    uint64_t flow_packets;		/* #pkts seen in interval */
 								    uint64_t flow_backlog_packets;	/* prev-mode #pkts (offl or kernel) */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								};
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								/* Datapath operation with optional ukey attached. */
 								struct ukey_op {
 								    struct udpif_key *ukey;
 								    struct dpif_flow_stats stats; /* Stats for 'op'. */
 								    struct dpif_op dop;           /* Flow operation. */
 								};
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-												lib: Expose struct ovs_list definition in <openvswitch/list.h>

Expose the struct ovs_list definition in <openvswitch/list.h>. Keep the
list access API private for now.

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-15 14:10:38 +01:00
+								static struct ovs_list all_udpifs = OVS_LIST_INITIALIZER(&all_udpifs);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								static size_t recv_upcalls(struct handler *);
 								static int process_upcall(struct udpif *, struct upcall *,
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								                          struct ofpbuf *odp_actions, struct flow_wildcards *);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls);
-												ofproto: Do not delete datapath flows on exit by default.

Commit e96a5c24e853 ("upcall: Remove datapath flows when setting
n-threads.") caused OVS to delete datapath flows when it exits through
any graceful means.  This is not necessarily desirable, especially when
OVS is being stopped as part of an upgrade.  This commit changes OVS so
that it only removes datapath flows when requested, via "ovs-appctl
exit --cleanup".

Acked-by: Numan Siddique <numans@ovn.org>
Tested-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-01-09 12:49:44 -08:00
+								static void udpif_stop_threads(struct udpif *, bool delete_flows);
-												ofproto: Change type of n_handlers and n_revalidators.

'n_handlers' and 'n_revalidators' are declared as type 'size_t'.
However, dpif_handlers_set() requires parameter 'n_handlers' as
type 'uint32_t'. This patch fixes this type mismatch.

Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:34 -04:00
+								static void udpif_start_threads(struct udpif *, uint32_t n_handlers,
 								                                uint32_t n_revalidators);
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								static void udpif_pause_revalidators(struct udpif *);
 								static void udpif_resume_revalidators(struct udpif *);
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								static void *udpif_upcall_handler(void *);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								static void *udpif_revalidator(void *);
-												ofproto-dpif-upcall: Use atomic_long in struct udpif

Some concern has been raised by Ben Pfaff that atomic_uint64_t may not
be portable. Accordingly, use atomic_ulong instead of atomic_uint64_t
in struct ofproto.

This is in preparation for removing atomic_uint64_t entirely.

Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-14 16:19:34 +09:00
+								static unsigned long udpif_get_n_flows(struct udpif *);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								static void revalidate(struct revalidator *);
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								static void revalidator_pause(struct revalidator *);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								static void revalidator_sweep(struct revalidator *);
-												upcall: Remove datapath flows when setting n-threads.

Previously, we would delete all ukeys when changing the number of
threads, but leave all flows in the datapath. This would cause
double-counting of stats for any flows that remain in the datapath. This
patch fixes the issue by ensuring that all flows are deleted from the
datapath before changing the number of threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:36 -08:00
+								static void revalidator_purge(struct revalidator *);
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								static void upcall_unixctl_show(struct unixctl_conn *conn, int argc,
 								                                const char *argv[], void *aux);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								static void upcall_unixctl_disable_megaflows(struct unixctl_conn *, int argc,
 								                                             const char *argv[], void *aux);
 								static void upcall_unixctl_enable_megaflows(struct unixctl_conn *, int argc,
 								                                            const char *argv[], void *aux);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								static void upcall_unixctl_disable_ufid(struct unixctl_conn *, int argc,
 								                                              const char *argv[], void *aux);
 								static void upcall_unixctl_enable_ufid(struct unixctl_conn *, int argc,
 								                                             const char *argv[], void *aux);
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
-												upcall: Add appctl call to set flow_limit.

This should assist testing of datapath performance, as it allows us to
skip "warming up" the flow limit value.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-06 09:49:19 -08:00
+								static void upcall_unixctl_set_flow_limit(struct unixctl_conn *conn, int argc,
 								                                            const char *argv[], void *aux);
-												udpif: Add command to wait for revalidation.

This allows us to remove some of the sleeps from the testsuite.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-25 14:02:45 +00:00
+								static void upcall_unixctl_dump_wait(struct unixctl_conn *conn, int argc,
 								                                     const char *argv[], void *aux);
-												tests: Add command to purge revalidators of flows.

This patch adds a new 'ovs-appctl revalidator/purge' command which
flushes all flows from all datapaths, and updates the revalidator
udpif_key cache at the same time.

Update the ofproto-dpif fragment tests which may fail when ukeys are
created from handler threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-13 10:42:47 -08:00
+								static void upcall_unixctl_purge(struct unixctl_conn *conn, int argc,
 								                                 const char *argv[], void *aux);
-												ofproto-dpif-upcall: Add debug commands to pause/resume revalidators.

New commands 'revalidator/pause' and 'revalidator/resume'.
Not documented, since these should not be used in production
environments.

Will be used for unit tests in the next commit.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-13 21:08:51 +02:00
+								static void upcall_unixctl_pause(struct unixctl_conn *conn, int argc,
 								                                 const char *argv[], void *aux);
 								static void upcall_unixctl_resume(struct unixctl_conn *conn, int argc,
 								                                  const char *argv[], void *aux);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
+								static void upcall_unixctl_ofproto_detrace(struct unixctl_conn *, int argc,
 								                                           const char *argv[], void *aux);
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								static struct udpif_key *ukey_create_from_upcall(struct upcall *,
 								                                                 struct flow_wildcards *);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								static int ukey_create_from_dpif_flow(const struct udpif *,
 								                                      const struct dpif_flow *,
 								                                      struct udpif_key **);
-												ofproto-dpif-upcall: Make ukey actions modifiable with RCU.

Future patches will need to modify ukey actions in some instances.
This patch makes this possible by protecting them with RCU.  It also
adds thread safety checks to enforce the new protection mechanism.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-12 14:50:54 -07:00
+								static void ukey_get_actions(struct udpif_key *, const struct nlattr **actions,
 								                             size_t *size);
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								static bool ukey_install__(struct udpif *, struct udpif_key *ukey)
 								    OVS_TRY_LOCK(true, ukey->mutex);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								static bool ukey_install(struct udpif *udpif, struct udpif_key *ukey);
-												revalidator: Improve logging for transition_ukey().

There are a few cases where more introspection into ukey transitions
would be relevant for logging or assertion. Track the SOURCE_LOCATOR and
thread id when states are transitioned and use these for logging.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:12 -07:00
+								static void transition_ukey_at(struct udpif_key *ukey, enum ukey_state dst,
 								                               const char *where)
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    OVS_REQUIRES(ukey->mutex);
-												revalidator: Improve logging for transition_ukey().

There are a few cases where more introspection into ukey transitions
would be relevant for logging or assertion. Track the SOURCE_LOCATOR and
thread id when states are transitioned and use these for logging.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:12 -07:00
+								#define transition_ukey(UKEY, DST) \
 								    transition_ukey_at(UKEY, DST, OVS_SOURCE_LOCATOR)
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								static struct udpif_key *ukey_lookup(struct udpif *udpif,
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								                                     const ovs_u128 *ufid,
 								                                     const unsigned pmd_id);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								static int ukey_acquire(struct udpif *, const struct dpif_flow *,
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								                        struct udpif_key **result, int *error);
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								static void ukey_delete__(struct udpif_key *);
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								static void ukey_delete(struct umap *, struct udpif_key *);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								static enum upcall_type classify_upcall(enum dpif_upcall_type type,
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								                                        const struct nlattr *userdata,
 								                                        struct user_action_cookie *cookie);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												upcall: Reuse flow_put initializer.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:02 -07:00
+								static void put_op_init(struct ukey_op *op, struct udpif_key *ukey,
 								                        enum dpif_flow_put_flags flags);
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								static void delete_op_init(struct udpif *udpif, struct ukey_op *op,
 								                           struct udpif_key *ukey);
-												upcall: Reuse flow_put initializer.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:02 -07:00
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								static int upcall_receive(struct upcall *, const struct dpif_backer *,
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								                          const struct dp_packet *packet, enum dpif_upcall_type,
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								                          const struct nlattr *userdata, const struct flow *,
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								                          const unsigned int mru,
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								                          const ovs_u128 *ufid, const unsigned pmd_id,
 								                          char **errorp);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								static void upcall_uninit(struct upcall *);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								static void udpif_flow_rebalance(struct udpif *udpif);
 								static int udpif_flow_program(struct udpif *udpif, struct udpif_key *ukey,
 								                              enum dpif_offload_type offload_type);
 								static int udpif_flow_unprogram(struct udpif *udpif, struct udpif_key *ukey,
 								                                enum dpif_offload_type offload_type);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								static upcall_callback upcall_cb;
-												dpif-netdev: Purge all ukeys when reconfigure pmd.

When dpdk configuration changes, all pmd threads are recreated
and rx queues of each port are reloaded.  After this process,
rx queue could be mapped to a different pmd thread other than
the one before reconfiguration.  However, this is totally
transparent to ofproto layer modules.  So, if the ofproto-dpif-upcall
module still holds ukeys generated before pmd thread recreation,
this old ukey will collide with the ukey for the new upcalls
from same traffic flow, causing flow installation failure.

To fix the bug, this commit adds a new call-back function
in dpif layer for notifying upper layer the purging of datapath
(e.g. pmd thread deletion in dpif-netdev).  So, the
ofproto-dpif-upcall module can react properly with deleting
the ukeys and with collecting flows' last stats.

Reported-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
Tested-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-25 16:36:46 -07:00
+								static dp_purge_callback dp_purge_cb;
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
-												treewide: Remove uses of ATOMIC_VAR_INIT.

ATOMIC_VAR_INIT has a trivial definition
`#define ATOMIC_VAR_INIT(value) (value)`,
is deprecated in C17/C++20, and will be removed in newer standards in
newer GCC/Clang (e.g. https://reviews.llvm.org/D144196).

Signed-off-by: Fangrui Song <maskray@google.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-28 18:30:56 -08:00
+								static atomic_bool enable_megaflows = true;
 								static atomic_bool enable_ufid = true;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ovs-appctl: register unixctl commands in the beginning

Some commands of ovs-appctl were lazily registered when first
bridg or bfd was added. Before that, calling these commands raised a
error("xxx is not a valid command"). The problem commangs included
"bfd/...", "upcall/...","dpif/...","fdb/..." and so on.

Fix this by moving the register into the "bridge_init" and
"bridge_init_ofproto". All commands are registered at the moment
ovs-vswitchd starts.

Signed-off-by: Huanle Han <hanxueluo@gmail.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 23:41:05 +08:00
+								void
 								udpif_init(void)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
 								    if (ovsthread_once_start(&once)) {
 								        unixctl_command_register("upcall/show", "", 0, 0, upcall_unixctl_show,
 								                                 NULL);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        unixctl_command_register("upcall/disable-megaflows", "", 0, 0,
 								                                 upcall_unixctl_disable_megaflows, NULL);
 								        unixctl_command_register("upcall/enable-megaflows", "", 0, 0,
 								                                 upcall_unixctl_enable_megaflows, NULL);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        unixctl_command_register("upcall/disable-ufid", "", 0, 0,
 								                                 upcall_unixctl_disable_ufid, NULL);
 								        unixctl_command_register("upcall/enable-ufid", "", 0, 0,
 								                                 upcall_unixctl_enable_ufid, NULL);
-												ovs-appctl: Add usage content to "upcall/set-flow-limit" command.

Signed-off-by: nickcooper-zhangtonghao <nic@opencloud.tech>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-12-06 01:01:22 -08:00
+								        unixctl_command_register("upcall/set-flow-limit", "flow-limit-number",
 , 1, upcall_unixctl_set_flow_limit, NULL);
-												udpif: Add command to wait for revalidation.

This allows us to remove some of the sleeps from the testsuite.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-25 14:02:45 +00:00
+								        unixctl_command_register("revalidator/wait", "", 0, 0,
 								                                 upcall_unixctl_dump_wait, NULL);
-												tests: Add command to purge revalidators of flows.

This patch adds a new 'ovs-appctl revalidator/purge' command which
flushes all flows from all datapaths, and updates the revalidator
udpif_key cache at the same time.

Update the ofproto-dpif fragment tests which may fail when ukeys are
created from handler threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-13 10:42:47 -08:00
+								        unixctl_command_register("revalidator/purge", "", 0, 0,
 								                                 upcall_unixctl_purge, NULL);
-												ofproto-dpif-upcall: Add debug commands to pause/resume revalidators.

New commands 'revalidator/pause' and 'revalidator/resume'.
Not documented, since these should not be used in production
environments.

Will be used for unit tests in the next commit.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-13 21:08:51 +02:00
+								        unixctl_command_register("revalidator/pause", NULL, 0, 0,
 								                                 upcall_unixctl_pause, NULL);
 								        unixctl_command_register("revalidator/resume", NULL, 0, 0,
 								                                 upcall_unixctl_resume, NULL);
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
+								        unixctl_command_register("ofproto/detrace", "UFID [pmd=PMD-ID]", 1, 2,
 								                                 upcall_unixctl_ofproto_detrace, NULL);
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								        ovsthread_once_done(&once);
 								    }
-												ovs-appctl: register unixctl commands in the beginning

Some commands of ovs-appctl were lazily registered when first
bridg or bfd was added. Before that, calling these commands raised a
error("xxx is not a valid command"). The problem commangs included
"bfd/...", "upcall/...","dpif/...","fdb/..." and so on.

Fix this by moving the register into the "bridge_init" and
"bridge_init_ofproto". All commands are registered at the moment
ovs-vswitchd starts.

Signed-off-by: Huanle Han <hanxueluo@gmail.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-15 23:41:05 +08:00
+								}
 								struct udpif *
 								udpif_create(struct dpif_backer *backer, struct dpif *dpif)
 								{
 								    struct udpif *udpif = xzalloc(sizeof *udpif);
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    udpif->dpif = dpif;
 								    udpif->backer = backer;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    atomic_init(&udpif->flow_limit, MIN(ofproto_flow_limit, 10000));
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    udpif->reval_seq = seq_create();
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    udpif->dump_seq = seq_create();
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    latch_init(&udpif->exit_latch);
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								    latch_init(&udpif->pause_latch);
-												list: Rename all functions in list.h with ovs_ prefix.

This attempts to prevent namespace collisions with other list libraries

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:22 -07:00
+								    ovs_list_push_back(&all_udpifs, &udpif->list_node);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    atomic_init(&udpif->enable_ufid, false);
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								    atomic_init(&udpif->n_flows, 0);
 								    atomic_init(&udpif->n_flows_timestamp, LLONG_MIN);
 								    ovs_mutex_init(&udpif->n_flows_mutex);
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    udpif->ukeys = xmalloc(N_UMAPS * sizeof *udpif->ukeys);
 								    for (int i = 0; i < N_UMAPS; i++) {
 								        cmap_init(&udpif->ukeys[i].cmap);
 								        ovs_mutex_init(&udpif->ukeys[i].mutex);
 								    }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    dpif_register_upcall_cb(dpif, upcall_cb, udpif);
-												dpif-netdev: Purge all ukeys when reconfigure pmd.

When dpdk configuration changes, all pmd threads are recreated
and rx queues of each port are reloaded.  After this process,
rx queue could be mapped to a different pmd thread other than
the one before reconfiguration.  However, this is totally
transparent to ofproto layer modules.  So, if the ofproto-dpif-upcall
module still holds ukeys generated before pmd thread recreation,
this old ukey will collide with the ukey for the new upcalls
from same traffic flow, causing flow installation failure.

To fix the bug, this commit adds a new call-back function
in dpif layer for notifying upper layer the purging of datapath
(e.g. pmd thread deletion in dpif-netdev).  So, the
ofproto-dpif-upcall module can react properly with deleting
the ukeys and with collecting flows' last stats.

Reported-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
Tested-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-25 16:36:46 -07:00
+								    dpif_register_dp_purge_cb(dpif, dp_purge_cb, udpif);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    return udpif;
 								}
-												udpif: Add command to wait for revalidation.

This allows us to remove some of the sleeps from the testsuite.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-25 14:02:45 +00:00
+								void
 								udpif_run(struct udpif *udpif)
 								{
 								    if (udpif->conns && udpif->conn_seq != seq_read(udpif->dump_seq)) {
 								        int i;
 								        for (i = 0; i < udpif->n_conns; i++) {
 								            unixctl_command_reply(udpif->conns[i], NULL);
 								        }
 								        free(udpif->conns);
 								        udpif->conns = NULL;
 								        udpif->n_conns = 0;
 								    }
 								}
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								void
 								udpif_destroy(struct udpif *udpif)
 								{
-												ofproto: Do not delete datapath flows on exit by default.

Commit e96a5c24e853 ("upcall: Remove datapath flows when setting
n-threads.") caused OVS to delete datapath flows when it exits through
any graceful means.  This is not necessarily desirable, especially when
OVS is being stopped as part of an upgrade.  This commit changes OVS so
that it only removes datapath flows when requested, via "ovs-appctl
exit --cleanup".

Acked-by: Numan Siddique <numans@ovn.org>
Tested-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-01-09 12:49:44 -08:00
+								    udpif_stop_threads(udpif, false);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												upcall: Unregister dpif cbs in udpif_destroy().

During udpif_create(), we register callbacks for handling upcalls and
purging the datapath; however, in the corresponding udpif_destroy() we
never did this. This could potentially lead to dereference of
uninitialized memory in the userspace datapath if the main thread
destroys the udpif then executes an OpenFlow packet-out.

Fixes: e4e74c3a2b9a ("dpif-netdev: Purge all ukeys when reconfigure pmd.")
Fixes: 623540e4617e ("dpif-netdev: Streamline miss handling.")
Reported-by: William Tu <u9012063@gmail.com>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-16 20:08:01 -07:00
+								    dpif_register_dp_purge_cb(udpif->dpif, NULL, udpif);
 								    dpif_register_upcall_cb(udpif->dpif, NULL, udpif);
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    for (int i = 0; i < N_UMAPS; i++) {
-												ofproto-dpif-upcall: Fix ukey leak on udpif destroy.

Since commit 79eadafeb1b4 udpif_stop_threads() doesn't delete datapath
flows while called from udpif_destroy().  This means that ukeys are
not cleaned up either.  So, hash maps in udpif->ukeys[] might still
contain valid pointers to ukeys that should be destroyed before
destroying the hash map itself:

  ==2783089==ERROR: LeakSanitizer: detected memory leaks

  Direct leak of 1560 byte(s) in 1 object(s) allocated from:
    # 0 0x7f8a57eae667 in __interceptor_malloc (/lib64/libasan.so.6+0xb0667)
    # 1 0x8411f6 in xmalloc lib/util.c:138
    # 2 0x4d8a52 in ukey_create__ ofproto/ofproto-dpif-upcall.c:1682
    # 3 0x4d99e3 in ukey_create_from_upcall ofproto/ofproto-dpif-upcall.c:1751
    # 4 0x4d517d in upcall_xlate ofproto/ofproto-dpif-upcall.c:1242
    # 5 0x4d63d2 in process_upcall ofproto/ofproto-dpif-upcall.c:1414
    # 6 0x4d29f3 in recv_upcalls ofproto/ofproto-dpif-upcall.c:833
    # 7 0x4d1ee1 in udpif_upcall_handler ofproto/ofproto-dpif-upcall.c:750
    # 8 0x795aa2 in ovsthread_wrapper lib/ovs-thread.c:383
    # 9 0x7f8a57a59431 in start_thread (/lib64/libpthread.so.0+0x9431)

Fixes: 79eadafeb1b4 ("ofproto: Do not delete datapath flows on exit by default.")
Reported-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-01-18 17:12:23 +01:00
+								        struct udpif_key *ukey;
 								        CMAP_FOR_EACH (ukey, cmap_node, &udpif->ukeys[i].cmap) {
 								            ukey_delete__(ukey);
 								        }
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								        cmap_destroy(&udpif->ukeys[i].cmap);
 								        ovs_mutex_destroy(&udpif->ukeys[i].mutex);
 								    }
 								    free(udpif->ukeys);
 								    udpif->ukeys = NULL;
-												list: Rename all functions in list.h with ovs_ prefix.

This attempts to prevent namespace collisions with other list libraries

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:22 -07:00
+								    ovs_list_remove(&udpif->list_node);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    latch_destroy(&udpif->exit_latch);
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								    latch_destroy(&udpif->pause_latch);
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    seq_destroy(udpif->reval_seq);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    seq_destroy(udpif->dump_seq);
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								    ovs_mutex_destroy(&udpif->n_flows_mutex);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    free(udpif);
 								}
-												ofproto: Do not delete datapath flows on exit by default.

Commit e96a5c24e853 ("upcall: Remove datapath flows when setting
n-threads.") caused OVS to delete datapath flows when it exits through
any graceful means.  This is not necessarily desirable, especially when
OVS is being stopped as part of an upgrade.  This commit changes OVS so
that it only removes datapath flows when requested, via "ovs-appctl
exit --cleanup".

Acked-by: Numan Siddique <numans@ovn.org>
Tested-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-01-09 12:49:44 -08:00
+								/* Stops the handler and revalidator threads.
 								 *
 								 * If 'delete_flows' is true, we delete ukeys and delete all flows from the
 								 * datapath.  Otherwise, we end up double-counting stats for flows that remain
 								 * in the datapath.  If 'delete_flows' is false, we skip this step.  This is
 								 * appropriate if OVS is about to exit anyway and it is desirable to let
 								 * existing network connections continue being forwarded afterward. */
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
+								static void
-												ofproto: Do not delete datapath flows on exit by default.

Commit e96a5c24e853 ("upcall: Remove datapath flows when setting
n-threads.") caused OVS to delete datapath flows when it exits through
any graceful means.  This is not necessarily desirable, especially when
OVS is being stopped as part of an upgrade.  This commit changes OVS so
that it only removes datapath flows when requested, via "ovs-appctl
exit --cleanup".

Acked-by: Numan Siddique <numans@ovn.org>
Tested-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-01-09 12:49:44 -08:00
+								udpif_stop_threads(struct udpif *udpif, bool delete_flows)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
-												ofproto-dpif-upcall: Fix logic error in handler/revalidator threads
creation and deletion.

Commit 1f8675481e (ofproto-dpif-upcall: Fix ovs-vswitchd crash.)
directly copied the udpif_set_threads() logic to udpif_stop_threads()
and udpif_start_threads().  In fact, this was erroneous and caused
unittest failures.

This commit fixes the above issue by correcting the checks in
udpif_stop_threads() and udpif_start_threads(), and adding necessary
checks in udpif_set_threads().

Acked-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-21 20:05:08 -07:00
+								    if (udpif && (udpif->n_handlers != 0 || udpif->n_revalidators != 0)) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        size_t i;
-												ofproto-dpif-upcall: Don't purge ukeys while in a quiescent state.

revalidator_purge() iterates and modifies umap->cmap. This should
not happen in quiescent state, because cmap implementation based
on rcu protected variables. Let's narrow the quiescent period
to avoid possible wrong memory accesses.

CC: Joe Stringer <joe@ovn.org>
Fixes: 9fce0584a643 ("revalidator: Use 'cmap' for storing ukeys.")
Reported-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-02 11:25:45 -07:00
+								        /* Tell the threads to exit. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        latch_set(&udpif->exit_latch);
-												ofproto-dpif-upcall: Don't purge ukeys while in a quiescent state.

revalidator_purge() iterates and modifies umap->cmap. This should
not happen in quiescent state, because cmap implementation based
on rcu protected variables. Let's narrow the quiescent period
to avoid possible wrong memory accesses.

CC: Joe Stringer <joe@ovn.org>
Fixes: 9fce0584a643 ("revalidator: Use 'cmap' for storing ukeys.")
Reported-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-02 11:25:45 -07:00
+								        /* Wait for the threads to exit.  Quiesce because this can take a long
 								         * time.. */
 								        ovsrcu_quiesce_start();
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        for (i = 0; i < udpif->n_handlers; i++) {
-												ofproto-dpif-upcall: Don't purge ukeys while in a quiescent state.

revalidator_purge() iterates and modifies umap->cmap. This should
not happen in quiescent state, because cmap implementation based
on rcu protected variables. Let's narrow the quiescent period
to avoid possible wrong memory accesses.

CC: Joe Stringer <joe@ovn.org>
Fixes: 9fce0584a643 ("revalidator: Use 'cmap' for storing ukeys.")
Reported-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-02 11:25:45 -07:00
+								            xpthread_join(udpif->handlers[i].thread, NULL);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        }
 								        for (i = 0; i < udpif->n_revalidators; i++) {
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            xpthread_join(udpif->revalidators[i].thread, NULL);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        }
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								        dpif_disable_upcall(udpif->dpif);
-												ofproto-dpif-upcall: Don't purge ukeys while in a quiescent state.

revalidator_purge() iterates and modifies umap->cmap. This should
not happen in quiescent state, because cmap implementation based
on rcu protected variables. Let's narrow the quiescent period
to avoid possible wrong memory accesses.

CC: Joe Stringer <joe@ovn.org>
Fixes: 9fce0584a643 ("revalidator: Use 'cmap' for storing ukeys.")
Reported-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-02 11:25:45 -07:00
+								        ovsrcu_quiesce_end();
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												ofproto: Do not delete datapath flows on exit by default.

Commit e96a5c24e853 ("upcall: Remove datapath flows when setting
n-threads.") caused OVS to delete datapath flows when it exits through
any graceful means.  This is not necessarily desirable, especially when
OVS is being stopped as part of an upgrade.  This commit changes OVS so
that it only removes datapath flows when requested, via "ovs-appctl
exit --cleanup".

Acked-by: Numan Siddique <numans@ovn.org>
Tested-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-01-09 12:49:44 -08:00
+								        if (delete_flows) {
 								            for (i = 0; i < udpif->n_revalidators; i++) {
 								                revalidator_purge(&udpif->revalidators[i]);
 								            }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        latch_poll(&udpif->exit_latch);
-												ovs-thread: Implement OVS specific barrier.

Non-leader revalidator thread uses pthread_barrier_* functions in their
main loop to synchronize with leader thread.  However, since those threads
only call poll_block() intermittently, the poll interval check in
poll_block() can wrongly take the time since last call as poll interval
and issue the following warnings:

"Unreasonably long XXXXms poll interval".

To prevent it, this commit implements the barrier struct and operations
for OVS which allow thread to block on barrier via poll_block().

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>


											
										
										
											2014-05-29 15:37:37 -07:00
+								        ovs_barrier_destroy(&udpif->reval_barrier);
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								        ovs_barrier_destroy(&udpif->pause_barrier);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        free(udpif->revalidators);
 								        udpif->revalidators = NULL;
 								        udpif->n_revalidators = 0;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        free(udpif->handlers);
 								        udpif->handlers = NULL;
 								        udpif->n_handlers = 0;
 								    }
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
+								}
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto-dpif-upcall: Don't purge ukeys while in a quiescent state.

revalidator_purge() iterates and modifies umap->cmap. This should
not happen in quiescent state, because cmap implementation based
on rcu protected variables. Let's narrow the quiescent period
to avoid possible wrong memory accesses.

CC: Joe Stringer <joe@ovn.org>
Fixes: 9fce0584a643 ("revalidator: Use 'cmap' for storing ukeys.")
Reported-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-02 11:25:45 -07:00
+								/* Starts the handler and revalidator threads. */
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
+								static void
-												ofproto: Change type of n_handlers and n_revalidators.

'n_handlers' and 'n_revalidators' are declared as type 'size_t'.
However, dpif_handlers_set() requires parameter 'n_handlers' as
type 'uint32_t'. This patch fixes this type mismatch.

Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:34 -04:00
+								udpif_start_threads(struct udpif *udpif, uint32_t n_handlers_,
 								                    uint32_t n_revalidators_)
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
+								{
-												dpif-netdev: Do not create handler threads.

Avoid unnecessary thread creation as no upcalls are generated,
resulting in idle threads waiting for process termination.

This optimization significantly reduces memory usage, cutting it
by half on a 128 CPU/thread system during testing, with the number
of threads reduced from 95 to 0.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-02-20 10:31:34 +01:00
+								    if (udpif && n_revalidators_) {
-												ofproto-dpif-upcall: Don't purge ukeys while in a quiescent state.

revalidator_purge() iterates and modifies umap->cmap. This should
not happen in quiescent state, because cmap implementation based
on rcu protected variables. Let's narrow the quiescent period
to avoid possible wrong memory accesses.

CC: Joe Stringer <joe@ovn.org>
Fixes: 9fce0584a643 ("revalidator: Use 'cmap' for storing ukeys.")
Reported-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-02 11:25:45 -07:00
+								        /* Creating a thread can take a significant amount of time on some
 								         * systems, even hundred of milliseconds, so quiesce around it. */
 								        ovsrcu_quiesce_start();
-												Don't shadow variables.

Rename the remaining variables that were shadowing another definition.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 17:34:14 -08:00
+								        udpif->n_handlers = n_handlers_;
 								        udpif->n_revalidators = n_revalidators_;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												dpif-netdev: Do not create handler threads.

Avoid unnecessary thread creation as no upcalls are generated,
resulting in idle threads waiting for process termination.

This optimization significantly reduces memory usage, cutting it
by half on a 128 CPU/thread system during testing, with the number
of threads reduced from 95 to 0.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-02-20 10:31:34 +01:00
+								        if (udpif->n_handlers) {
 								            udpif->handlers = xzalloc(udpif->n_handlers
 								                                      * sizeof *udpif->handlers);
 								            for (size_t i = 0; i < udpif->n_handlers; i++) {
 								                struct handler *handler = &udpif->handlers[i];
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												dpif-netdev: Do not create handler threads.

Avoid unnecessary thread creation as no upcalls are generated,
resulting in idle threads waiting for process termination.

This optimization significantly reduces memory usage, cutting it
by half on a 128 CPU/thread system during testing, with the number
of threads reduced from 95 to 0.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-02-20 10:31:34 +01:00
+								                handler->udpif = udpif;
 								                handler->handler_id = i;
 								                handler->thread = ovs_thread_create(
 								                    "handler", udpif_upcall_handler, handler);
 								            }
 								        } else {
 								            udpif->handlers = NULL;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        }
-												Don't shadow variables.

Rename the remaining variables that were shadowing another definition.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 17:34:14 -08:00
+								        atomic_init(&udpif->enable_ufid, udpif->backer->rt_support.ufid);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								        dpif_enable_upcall(udpif->dpif);
-												ovs-thread: Implement OVS specific barrier.

Non-leader revalidator thread uses pthread_barrier_* functions in their
main loop to synchronize with leader thread.  However, since those threads
only call poll_block() intermittently, the poll interval check in
poll_block() can wrongly take the time since last call as poll interval
and issue the following warnings:

"Unreasonably long XXXXms poll interval".

To prevent it, this commit implements the barrier struct and operations
for OVS which allow thread to block on barrier via poll_block().

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>


											
										
										
											2014-05-29 15:37:37 -07:00
+								        ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators);
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								        ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								        udpif->reval_exit = false;
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								        udpif->pause = false;
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								        udpif->offload_rebalance_time = time_msec();
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        udpif->revalidators = xzalloc(udpif->n_revalidators
 								                                      * sizeof *udpif->revalidators);
-												Don't shadow variables.

Rename the remaining variables that were shadowing another definition.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 17:34:14 -08:00
+								        for (size_t i = 0; i < udpif->n_revalidators; i++) {
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								            struct revalidator *revalidator = &udpif->revalidators[i];
 								            revalidator->udpif = udpif;
-												ovs-thread: Make caller provide thread name when creating a thread.

Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one.  This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible.  This also simplifies code, since two function calls become
only one.

This makes a few other changes to the thread creation function:

    * Since it is no longer a direct wrapper around a pthread function,
      rename it to avoid giving that impression.

    * Remove 'pthread_attr_t *' param that every caller supplied as NULL.

    * Change 'pthread *' parameter into a return value, for convenience.

The system-stats code hadn't set a thread name, so this fixes that issue.

This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself.  Setting the thread
name earlier, as in this patch, avoids the problem.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-25 17:46:21 -07:00
+								            revalidator->thread = ovs_thread_create(
 								                "revalidator", udpif_revalidator, revalidator);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        }
-												ofproto-dpif-upcall: Don't purge ukeys while in a quiescent state.

revalidator_purge() iterates and modifies umap->cmap. This should
not happen in quiescent state, because cmap implementation based
on rcu protected variables. Let's narrow the quiescent period
to avoid possible wrong memory accesses.

CC: Joe Stringer <joe@ovn.org>
Fixes: 9fce0584a643 ("revalidator: Use 'cmap' for storing ukeys.")
Reported-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-02 11:25:45 -07:00
+								        ovsrcu_quiesce_end();
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
+								}
-												ovs-rcu: New library.

RCU allows multiple threads to read objects in parallel without any
performance penalty.  The following commit will introduce the first use.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-03-18 16:34:28 -07:00
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								/* Pauses all revalidators.  Should only be called by the main thread.
 								 * When function returns, all revalidators are paused and will proceed
 								 * only after udpif_resume_revalidators() is called. */
 								static void
 								udpif_pause_revalidators(struct udpif *udpif)
 								{
-												ofproto-dpif: Unhide structure contents.

Until now, ofproto-dpif.c has hidden the definitions of several structures,
such as struct ofproto_dpif and struct rule_dpif.  This kind of information
hiding is often beneficial, because it forces code outside the file with
the definition to use the documented interfaces.  However, in this case it
was starting to burden ofproto-dpif with an increasing number of trivial
helpers that were not improving or maintaining a useful abstraction and
that were making code harder to maintain and read.

Information hiding also made it hard to move blocks of code outside
ofproto-dpif.c itself, since any code moved out often needed new helpers if
it used anything that wasn't previously exposed.  In the present instance,
upcoming patches will move code for tracing outside ofproto-dpif, and this
would require adding several helpers that would just obscure the function
of the code otherwise needlessly.

In balance, it seems that there is more harm than good in the information
hiding here, so this commit moves the definitions of several structures
from ofproto-dpif.c into ofproto-dpif.h.  It also removes all of the
trivial helpers that had accumulated, instead changing their users to
directly access the members that they needed.  It also reorganizes
ofproto-dpif.h, grouping structure definitions and function prototypes in a
sensible way.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Lance Richardson <lrichard@redhat.com>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2016-12-06 14:08:42 -08:00
+								    if (udpif->backer->recv_set_enable) {
-												ofproto-dpif: Do not block on uninitialized pause barriers.

e4e74c3a "dpif-netdev: Purge all ukeys when reconfigure pmd." introduced a new
dp_purge_cb function, which calls udpif_pause_revalidators() and that tries to
block on pause_barrier.
But if OVS was started with flow-restore-wait="true" (e.g. through ovs-ctl),
type_run() will have backer->recv_set_enable == false, and udpif_set_threads
won't initialize the barrier, which leads to a segfault like this:

This patch introduces ofproto_dpif_backer_enabled(), which checks
recv_set_enable before touching the latch and blocking on pause_barrier.

Signed-off-by: Zoltan Kiss <zoltan.kiss@linaro.org>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-09-25 11:42:40 -07:00
+								        latch_set(&udpif->pause_latch);
 								        ovs_barrier_block(&udpif->pause_barrier);
 								    }
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								}
 								/* Resumes the pausing of revalidators.  Should only be called by the
 								 * main thread. */
 								static void
 								udpif_resume_revalidators(struct udpif *udpif)
 								{
-												ofproto-dpif: Unhide structure contents.

Until now, ofproto-dpif.c has hidden the definitions of several structures,
such as struct ofproto_dpif and struct rule_dpif.  This kind of information
hiding is often beneficial, because it forces code outside the file with
the definition to use the documented interfaces.  However, in this case it
was starting to burden ofproto-dpif with an increasing number of trivial
helpers that were not improving or maintaining a useful abstraction and
that were making code harder to maintain and read.

Information hiding also made it hard to move blocks of code outside
ofproto-dpif.c itself, since any code moved out often needed new helpers if
it used anything that wasn't previously exposed.  In the present instance,
upcoming patches will move code for tracing outside ofproto-dpif, and this
would require adding several helpers that would just obscure the function
of the code otherwise needlessly.

In balance, it seems that there is more harm than good in the information
hiding here, so this commit moves the definitions of several structures
from ofproto-dpif.c into ofproto-dpif.h.  It also removes all of the
trivial helpers that had accumulated, instead changing their users to
directly access the members that they needed.  It also reorganizes
ofproto-dpif.h, grouping structure definitions and function prototypes in a
sensible way.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Lance Richardson <lrichard@redhat.com>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2016-12-06 14:08:42 -08:00
+								    if (udpif->backer->recv_set_enable) {
-												ofproto-dpif: Do not block on uninitialized pause barriers.

e4e74c3a "dpif-netdev: Purge all ukeys when reconfigure pmd." introduced a new
dp_purge_cb function, which calls udpif_pause_revalidators() and that tries to
block on pause_barrier.
But if OVS was started with flow-restore-wait="true" (e.g. through ovs-ctl),
type_run() will have backer->recv_set_enable == false, and udpif_set_threads
won't initialize the barrier, which leads to a segfault like this:

This patch introduces ofproto_dpif_backer_enabled(), which checks
recv_set_enable before touching the latch and blocking on pause_barrier.

Signed-off-by: Zoltan Kiss <zoltan.kiss@linaro.org>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-09-25 11:42:40 -07:00
+								        latch_poll(&udpif->pause_latch);
 								        ovs_barrier_block(&udpif->pause_barrier);
 								    }
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								}
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
+								/* Tells 'udpif' how many threads it should use to handle upcalls.
-												Don't shadow variables.

Rename the remaining variables that were shadowing another definition.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 17:34:14 -08:00
+								 * 'n_handlers_' and 'n_revalidators_' can never be zero.  'udpif''s
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
+								 * datapath handle must have packet reception enabled before starting
 								 * threads. */
 								void
-												ofproto: Change type of n_handlers and n_revalidators.

'n_handlers' and 'n_revalidators' are declared as type 'size_t'.
However, dpif_handlers_set() requires parameter 'n_handlers' as
type 'uint32_t'. This patch fixes this type mismatch.

Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:34 -04:00
+								udpif_set_threads(struct udpif *udpif, uint32_t n_handlers_,
 								                  uint32_t n_revalidators_)
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
+								{
-												ofproto-dpif-upcall: Fix logic error in handler/revalidator threads
creation and deletion.

Commit 1f8675481e (ofproto-dpif-upcall: Fix ovs-vswitchd crash.)
directly copied the udpif_set_threads() logic to udpif_stop_threads()
and udpif_start_threads().  In fact, this was erroneous and caused
unittest failures.

This commit fixes the above issue by correcting the checks in
udpif_stop_threads() and udpif_start_threads(), and adding necessary
checks in udpif_set_threads().

Acked-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-21 20:05:08 -07:00
+								    ovs_assert(udpif);
-												dpif-netlink: Introduce per-cpu upcall dispatch.

The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:36 -04:00
+								    uint32_t n_handlers_requested;
 								    uint32_t n_revalidators_requested;
 								    bool forced = false;
 								    if (dpif_number_handlers_required(udpif->dpif, &n_handlers_requested)) {
 								        forced = true;
 								        if (!n_revalidators_) {
-												dpif-netdev: Do not create handler threads.

Avoid unnecessary thread creation as no upcalls are generated,
resulting in idle threads waiting for process termination.

This optimization significantly reduces memory usage, cutting it
by half on a 128 CPU/thread system during testing, with the number
of threads reduced from 95 to 0.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-02-20 10:31:34 +01:00
+								            n_revalidators_requested = (n_handlers_requested
 								                                        ? n_handlers_requested
 								                                        : MAX(count_cpu_cores(), 2)) / 4 + 1;
-												dpif-netlink: Introduce per-cpu upcall dispatch.

The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:36 -04:00
+								        } else {
 								            n_revalidators_requested = n_revalidators_;
 								        }
 								    } else {
 								        int threads = MAX(count_cpu_cores(), 2);
 								        n_revalidators_requested = MAX(n_revalidators_, 0);
 								        n_handlers_requested = MAX(n_handlers_, 0);
 								        if (!n_revalidators_requested) {
 								            n_revalidators_requested = n_handlers_requested
 								                    ? MAX(threads - (int) n_handlers_requested, 1)
 								                    : threads / 4 + 1;
 								        }
 								        if (!n_handlers_requested) {
 								            n_handlers_requested = MAX(threads -
 								                                       (int) n_revalidators_requested, 1);
 								        }
 								    }
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
-												dpif-netlink: Introduce per-cpu upcall dispatch.

The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:36 -04:00
+								    if (udpif->n_handlers != n_handlers_requested
 								        || udpif->n_revalidators != n_revalidators_requested) {
 								        if (forced) {
 								            VLOG_INFO("Overriding n-handler-threads to %u, setting "
 								                      "n-revalidator-threads to %u", n_handlers_requested,
 								                      n_revalidators_requested);
 								        } else {
 								            VLOG_INFO("Setting n-handler-threads to %u, setting "
 								                      "n-revalidator-threads to %u", n_handlers_requested,
 								                      n_revalidators_requested);
 								        }
-												ofproto: Do not delete datapath flows on exit by default.

Commit e96a5c24e853 ("upcall: Remove datapath flows when setting
n-threads.") caused OVS to delete datapath flows when it exits through
any graceful means.  This is not necessarily desirable, especially when
OVS is being stopped as part of an upgrade.  This commit changes OVS so
that it only removes datapath flows when requested, via "ovs-appctl
exit --cleanup".

Acked-by: Numan Siddique <numans@ovn.org>
Tested-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-01-09 12:49:44 -08:00
+								        udpif_stop_threads(udpif, true);
-												ofproto-dpif-upcall: Fix logic error in handler/revalidator threads
creation and deletion.

Commit 1f8675481e (ofproto-dpif-upcall: Fix ovs-vswitchd crash.)
directly copied the udpif_set_threads() logic to udpif_stop_threads()
and udpif_start_threads().  In fact, this was erroneous and caused
unittest failures.

This commit fixes the above issue by correcting the checks in
udpif_stop_threads() and udpif_start_threads(), and adding necessary
checks in udpif_set_threads().

Acked-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-21 20:05:08 -07:00
+								    }
-												ofproto-dpif-upcall: Fix ovs-vswitchd crash.

On current master, caller of udpif_set_threads() can pass 0 value
on n_handlers and n_revalidators to delete all handler and revalidator
threads.

After commit 9a159f748866 (ofproto-dpif-upcall: Remove the dispatcher
thread.), udpif_set_threads() also calls the dpif_handlers_set() with
the 0 value 'n_handlers'.  Since dpif level always assume the 'n_handlers'
be non-zero, this causes warnings and even crash of ovs-vswitchd.

This commit fixes the above issue by defining separate functions for
starting and stopping handler and revalidator threads.  So
udpif_set_threads() will never be called with 0 value arguments.

Reported-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Co-authored-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-04-21 17:31:11 -07:00
-												ofproto-dpif-upcall: Fix logic error in handler/revalidator threads
creation and deletion.

Commit 1f8675481e (ofproto-dpif-upcall: Fix ovs-vswitchd crash.)
directly copied the udpif_set_threads() logic to udpif_stop_threads()
and udpif_start_threads().  In fact, this was erroneous and caused
unittest failures.

This commit fixes the above issue by correcting the checks in
udpif_stop_threads() and udpif_start_threads(), and adding necessary
checks in udpif_set_threads().

Acked-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-21 20:05:08 -07:00
+								    if (!udpif->handlers && !udpif->revalidators) {
-												dpif-netlink: Introduce per-cpu upcall dispatch.

The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:36 -04:00
+								        VLOG_INFO("Starting %u threads", n_handlers_requested +
 								                                         n_revalidators_requested);
-												ofproto-dpif-upcall: Do not call dpif_handlers_set() when there is no
handler/revalidator configuration change.

dpif_handlers_set(), which dumps all vport from datapath, should not be
called constantly.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-09 14:42:30 -07:00
+								        int error;
-												dpif-netlink: Introduce per-cpu upcall dispatch.

The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:36 -04:00
+								        error = dpif_handlers_set(udpif->dpif, n_handlers_requested);
-												ofproto-dpif-upcall: Do not call dpif_handlers_set() when there is no
handler/revalidator configuration change.

dpif_handlers_set(), which dumps all vport from datapath, should not be
called constantly.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-09 14:42:30 -07:00
+								        if (error) {
 								            VLOG_ERR("failed to configure handlers in dpif %s: %s",
 								                     dpif_name(udpif->dpif), ovs_strerror(error));
 								            return;
 								        }
-												dpif-netlink: Introduce per-cpu upcall dispatch.

The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:36 -04:00
+								        udpif_start_threads(udpif, n_handlers_requested,
 								                            n_revalidators_requested);
-												ofproto-dpif-upcall: Fix logic error in handler/revalidator threads
creation and deletion.

Commit 1f8675481e (ofproto-dpif-upcall: Fix ovs-vswitchd crash.)
directly copied the udpif_set_threads() logic to udpif_stop_threads()
and udpif_start_threads().  In fact, this was erroneous and caused
unittest failures.

This commit fixes the above issue by correcting the checks in
udpif_stop_threads() and udpif_start_threads(), and adding necessary
checks in udpif_set_threads().

Acked-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-21 20:05:08 -07:00
+								    }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								}
 								/* Notifies 'udpif' that something changed which may render previous
 								 * xlate_actions() results invalid. */
 								void
 								udpif_revalidate(struct udpif *udpif)
 								{
-												ofproto: Replace reval_seq with a struct seq.

Future patches will need to poll_block() on it.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-17 14:35:53 -07:00
+								    seq_change(udpif->reval_seq);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								}
-												guarded-list: New data structure for thread-safe queue.

We already had queues that were suitable for replacement by this data
structure, and I intend to add another one later on.

flow_miss_batch_ofproto_destroyed() did not work well with the guarded-list
structure (it required either adding a lot more functions or breaking the
abstraction) so I changed the caller to just use udpif_revalidate().

Checking reval_seq at the end of handle_miss_upcalls() also didn't work
well with the abstraction, so I decided that since this was a corner case
anyway it would be acceptable to just drop those in flow_miss_batch_next().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-09-12 17:42:23 -07:00
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								/* Returns a seq which increments every time 'udpif' pulls stats from the
 								 * datapath.  Callers can use this to get a sense of when might be a good time
 								 * to do periodic work which relies on relatively up to date statistics. */
 								struct seq *
 								udpif_dump_seq(struct udpif *udpif)
 								{
 								    return udpif->dump_seq;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								}
-												ofproto-dpif-upcall: Add memory usage stats.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 17:41:02 -08:00
+								void
 								udpif_get_memory_usage(struct udpif *udpif, struct simap *usage)
 								{
 								    size_t i;
 								    simap_increase(usage, "handlers", udpif->n_handlers);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
 								    simap_increase(usage, "revalidators", udpif->n_revalidators);
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    for (i = 0; i < N_UMAPS; i++) {
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								        simap_increase(usage, "udpif keys", cmap_count(&udpif->ukeys[i].cmap));
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    }
-												ofproto-dpif-upcall: Add memory usage stats.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 17:41:02 -08:00
+								}
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
+								/* Remove flows from a single datapath. */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								void
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
+								udpif_flush(struct udpif *udpif)
 								{
-												ofproto: Change type of n_handlers and n_revalidators.

'n_handlers' and 'n_revalidators' are declared as type 'size_t'.
However, dpif_handlers_set() requires parameter 'n_handlers' as
type 'uint32_t'. This patch fixes this type mismatch.

Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:34 -04:00
+								    uint32_t n_handlers_ = udpif->n_handlers;
 								    uint32_t n_revalidators_ = udpif->n_revalidators;
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
-												ofproto: Do not delete datapath flows on exit by default.

Commit e96a5c24e853 ("upcall: Remove datapath flows when setting
n-threads.") caused OVS to delete datapath flows when it exits through
any graceful means.  This is not necessarily desirable, especially when
OVS is being stopped as part of an upgrade.  This commit changes OVS so
that it only removes datapath flows when requested, via "ovs-appctl
exit --cleanup".

Acked-by: Numan Siddique <numans@ovn.org>
Tested-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-01-09 12:49:44 -08:00
+								    udpif_stop_threads(udpif, true);
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
+								    dpif_flow_flush(udpif->dpif);
-												Don't shadow variables.

Rename the remaining variables that were shadowing another definition.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 17:34:14 -08:00
+								    udpif_start_threads(udpif, n_handlers_, n_revalidators_);
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
+								}
 								/* Removes all flows from all datapaths. */
 								static void
 								udpif_flush_all_datapaths(void)
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								{
 								    struct udpif *udpif;
 								    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
+								        udpif_flush(udpif);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    }
 								}
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
-												upcall: Simplify enable_ufid debug option.

We previously tracked the debug enable/disable of UFID in each udpif,
and allowed the ovs-appctl debug option to turn on UFID features even if
the datapath doesn't support it.

This commit shifts the enable_ufid debug flag to a single flag, and
provides a helper to determine whether UFID features should be used on a
per-udpif basis.

Suggested-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-19 09:54:38 -08:00
+								static bool
 								udpif_use_ufid(struct udpif *udpif)
 								{
 								    bool enable;
 								    atomic_read_relaxed(&enable_ufid, &enable);
-												ofproto-dpif: Add boottime support field.

When changing support fields, it may be unsafe to set support level
beyond what datapath can support.

This patch introduce the notion of boot time support and
runtime support fields. Boot time support are set only
once during ofproto start up phase, and not changed during
runtime. The runtime support fields are the same as boot time
support fields at the startup time, but can be changed via
the 'ovs-appctl' command.  However, each change will
be checked against the corresponding boot time support field. Only
feature reduction from the boot time support is allowed.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-16 23:39:31 -07:00
+								    return enable && udpif->backer->rt_support.ufid;
-												upcall: Simplify enable_ufid debug option.

We previously tracked the debug enable/disable of UFID in each udpif,
and allowed the ovs-appctl debug option to turn on UFID features even if
the datapath doesn't support it.

This commit shifts the enable_ufid debug flag to a single flag, and
provides a helper to determine whether UFID features should be used on a
per-udpif basis.

Suggested-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-19 09:54:38 -08:00
+								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto-dpif-upcall: Use atomic_long in struct udpif

Some concern has been raised by Ben Pfaff that atomic_uint64_t may not
be portable. Accordingly, use atomic_ulong instead of atomic_uint64_t
in struct ofproto.

This is in preparation for removing atomic_uint64_t entirely.

Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-14 16:19:34 +09:00
+								static unsigned long
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								udpif_get_n_flows(struct udpif *udpif)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								    long long int time, now;
-												ofproto-dpif-upcall: Use atomic_long in struct udpif

Some concern has been raised by Ben Pfaff that atomic_uint64_t may not
be portable. Accordingly, use atomic_ulong instead of atomic_uint64_t
in struct ofproto.

This is in preparation for removing atomic_uint64_t entirely.

Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-14 16:19:34 +09:00
+								    unsigned long flow_count;
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
 								    now = time_msec();
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								    atomic_read_relaxed(&udpif->n_flows_timestamp, &time);
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								    if (time < now - 100 && !ovs_mutex_trylock(&udpif->n_flows_mutex)) {
 								        struct dpif_dp_stats stats;
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								        atomic_store_relaxed(&udpif->n_flows_timestamp, now);
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								        dpif_get_dp_stats(udpif->dpif, &stats);
 								        flow_count = stats.n_flows;
-												ofproto-dpif-upcall: Include hardware offloaded flows in total flows.

The revalidator process uses the internal call udpif_get_n_flows()
to get the total number of flows installed in the system. It uses
this value for various decisions on flow installation and removal.
With the tc offload this values is incorrect, as the hardware
offloaded are not included. With rte_flow offload this is not a
problem as dpif netdev keeps both in sync.

This patch will include the hardware offloaded flows if the
underlying dpif implementation is not syncing them.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:30:11 +01:00
 								        if (!dpif_synced_dp_layers(udpif->dpif)) {
 								            /* If the dpif layer does not sync the flows, we need to include
 								             * the hardware offloaded flows separately. */
 								            uint64_t hw_flows;
 								            if (!dpif_get_n_offloaded_flows(udpif->dpif, &hw_flows)) {
 								                flow_count += hw_flows;
 								            }
 								        }
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								        atomic_store_relaxed(&udpif->n_flows, flow_count);
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								        ovs_mutex_unlock(&udpif->n_flows_mutex);
 								    } else {
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								        atomic_read_relaxed(&udpif->n_flows, &flow_count);
-												upcall: Cache the number of flows from the datapath.

Fetching the number of flows in the datapath has been causing
unnecessary contention on the kernel ovs_lock in recent TCP CRR tests.
This patch caches this number for up to 100ms in the userspace to reduce
such kernel calls.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Co-authored-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off--by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-01-22 06:50:49 +00:00
+								    }
 								    return flow_count;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								}
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
+								/* The upcall handler thread tries to read a batch of UPCALL_MAX_BATCH
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								 * upcalls from dpif, processes the batch and installs corresponding flows
 								 * in dpif. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								static void *
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
+								udpif_upcall_handler(void *arg)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
 								    struct handler *handler = arg;
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								    struct udpif *udpif = handler->udpif;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ofproto-dpif-upcall: Slightly simplify udpif_upcall_handler().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-27 16:29:24 -08:00
+								    while (!latch_is_set(&handler->udpif->exit_latch)) {
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        if (recv_upcalls(handler)) {
 								            poll_immediate_wake();
 								        } else {
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								            dpif_recv_wait(udpif->dpif, handler->handler_id);
 								            latch_wait(&udpif->exit_latch);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        }
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        poll_block();
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
-												ofproto-dpif-upcall: Slightly simplify udpif_upcall_handler().

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-12-27 16:29:24 -08:00
 								    return NULL;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								static size_t
 								recv_upcalls(struct handler *handler)
 								{
 								    struct udpif *udpif = handler->udpif;
 								    uint64_t recv_stubs[UPCALL_MAX_BATCH][512 / 8];
 								    struct ofpbuf recv_bufs[UPCALL_MAX_BATCH];
-												ofproto-dpif-upcall: Fix use of cleared stack memory.

Commit cc377352d (ofproto: Reorganize in preparation for direct
dpdk upcalls.) introduced the bug that uses variable defined on
the stack inside while loop for reading dpif upcalls and keeps
reference to attributes of the variable within the same function
after the stack is cleared.  This bug can cause ovs abort.

This commit fixes the above issue by defining an array of the
variable on the function stack.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-08-15 00:59:36 -07:00
+								    struct dpif_upcall dupcalls[UPCALL_MAX_BATCH];
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    struct upcall upcalls[UPCALL_MAX_BATCH];
-												ofproto-dpif-upcall: Fix out-of-scope use of stack memory.

Commit cc377352d (ofproto: Reorganize in preparation for direct
dpdk upcalls.) introduced the bug that keeps reference to 'struct
flow' defined on the stack inside while loop when running out of
the scope.  This causes strange bug like wrong mask extraction
when the part of memory is corrupted, and could lead to even
more serious bug/crash.

This commit fixes the above issue by defining an array of the
'struct flow's on the function stack.

Found by running ovs on RHEL7.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-10 14:41:10 -07:00
+								    struct flow flows[UPCALL_MAX_BATCH];
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    size_t n_upcalls, i;
 								    n_upcalls = 0;
 								    while (n_upcalls < UPCALL_MAX_BATCH) {
 								        struct ofpbuf *recv_buf = &recv_bufs[n_upcalls];
-												ofproto-dpif-upcall: Fix use of cleared stack memory.

Commit cc377352d (ofproto: Reorganize in preparation for direct
dpdk upcalls.) introduced the bug that uses variable defined on
the stack inside while loop for reading dpif upcalls and keeps
reference to attributes of the variable within the same function
after the stack is cleared.  This bug can cause ovs abort.

This commit fixes the above issue by defining an array of the
variable on the function stack.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-08-15 00:59:36 -07:00
+								        struct dpif_upcall *dupcall = &dupcalls[n_upcalls];
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        struct upcall *upcall = &upcalls[n_upcalls];
-												ofproto-dpif-upcall: Fix out-of-scope use of stack memory.

Commit cc377352d (ofproto: Reorganize in preparation for direct
dpdk upcalls.) introduced the bug that keeps reference to 'struct
flow' defined on the stack inside while loop when running out of
the scope.  This causes strange bug like wrong mask extraction
when the part of memory is corrupted, and could lead to even
more serious bug/crash.

This commit fixes the above issue by defining an array of the
'struct flow's on the function stack.

Found by running ovs on RHEL7.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-10 14:41:10 -07:00
+								        struct flow *flow = &flows[n_upcalls];
-												ofproto-dpif-upcall: Echo HASH attribute back to datapath.

The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.

The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-15 10:58:59 +08:00
+								        unsigned int mru = 0;
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								        char *errorp = NULL;
-												ofproto-dpif-upcall: Fix using uninitialized upcall hash.

upcalls are allocated on stack and 'hash' field must be initialized
regardless of attribute existence because it will be used later.

 Conditional jump or move depends on uninitialised value(s)
    at 0xFA74A7: dpif_netlink_encode_execute (dpif-netlink.c:1828)
    by 0xFA6DE8: dpif_netlink_operate__ (dpif-netlink.c:1906)
    by 0xFA612F: dpif_netlink_operate_chunks (dpif-netlink.c:2219)
    by 0xFA0E36: dpif_netlink_operate (dpif-netlink.c:2275)
    by 0xE5AFAC: dpif_operate (dpif.c:1376)
    by 0xDF3922: handle_upcalls (ofproto-dpif-upcall.c:1615)
    by 0xDF269B: recv_upcalls (ofproto-dpif-upcall.c:857)
    by 0xDF1C49: udpif_upcall_handler (ofproto-dpif-upcall.c:759)
    by 0xF3A3FE: ovsthread_wrapper (ovs-thread.c:383)
    by 0x565F6DA: start_thread (pthread_create.c:463)
    by 0x615988E: clone (clone.S:95)
  Uninitialised value was created by a stack allocation
    at 0xDF2258: recv_upcalls (ofproto-dpif-upcall.c:773)

Fixes: 0442bfb11d6c ("ofproto-dpif-upcall: Echo HASH attribute back to datapath.")
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-01-04 01:07:36 +01:00
+								        uint64_t hash = 0;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        int error;
-												ofproto-dpif-upcall: Properly initialize 'recv_buf'.

Due to a typo, the latest upcall refactoring caused dpif_recv() to be
called on an un-initialized chunk of memory.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Reported-by: Justin Pettit <jpettit@nicira.com>
Acked-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2014-08-14 15:48:00 -07:00
+								        ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls],
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								                        sizeof recv_stubs[n_upcalls]);
-												ofproto-dpif-upcall: Fix use of cleared stack memory.

Commit cc377352d (ofproto: Reorganize in preparation for direct
dpdk upcalls.) introduced the bug that uses variable defined on
the stack inside while loop for reading dpif upcalls and keeps
reference to attributes of the variable within the same function
after the stack is cleared.  This bug can cause ovs abort.

This commit fixes the above issue by defining an array of the
variable on the function stack.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-08-15 00:59:36 -07:00
+								        if (dpif_recv(udpif->dpif, handler->handler_id, dupcall, recv_buf)) {
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								            ofpbuf_uninit(recv_buf);
 								            break;
 								        }
-												ofproto-dpif-upcall: Slow path flows that datapath can't fully match.

In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:

    - The flow lacks one or more fields that userspace expects to be there,
      that is, the datapath doesn't understand or parse them but userspace
      does.  This is, for example, what would happen if current OVS
      userspace, which understands and extracts TCP flags, were to be
      paired with an older OVS kernel module, which does not.  Internally
      OVS uses the name ODP_FIT_TOO_LITTLE for this situation.

    - The flow includes fields that userspace does not know about, that is,
      the datapath understands and parses them but userspace does not.
      This is, for example, what would happen if an old OVS userspace that
      does not understand or extract TCP flags, were to be paired with a
      recent OVS kernel module that does.  Internally, OVS uses the name
      ODP_FIT_TOO_MUCH for this situation.

The latter is not a big deal and OVS doesn't have to do much to cope with
it.

The former is more of a problem.  When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path.  Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.

Somehow, this nuance was lost a long time.  From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE.  Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it.  I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix.  This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.

CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-24 11:40:19 -08:00
+								        upcall->fitness = odp_flow_key_to_flow(dupcall->key, dupcall->key_len,
-												odp-util: Improve log messages and error reporting for Netlink parsing.

As a side effect, this also reduces a lot of log messages' severities from
ERR to WARN.  They just didn't seem like messages that in general reported
anything that would prevent functioning.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-12-14 18:16:55 -08:00
+								                                               flow, NULL);
-												ofproto-dpif-upcall: Slow path flows that datapath can't fully match.

In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:

    - The flow lacks one or more fields that userspace expects to be there,
      that is, the datapath doesn't understand or parse them but userspace
      does.  This is, for example, what would happen if current OVS
      userspace, which understands and extracts TCP flags, were to be
      paired with an older OVS kernel module, which does not.  Internally
      OVS uses the name ODP_FIT_TOO_LITTLE for this situation.

    - The flow includes fields that userspace does not know about, that is,
      the datapath understands and parses them but userspace does not.
      This is, for example, what would happen if an old OVS userspace that
      does not understand or extract TCP flags, were to be paired with a
      recent OVS kernel module that does.  Internally, OVS uses the name
      ODP_FIT_TOO_MUCH for this situation.

The latter is not a big deal and OVS doesn't have to do much to cope with
it.

The former is more of a problem.  When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path.  Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.

Somehow, this nuance was lost a long time.  From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE.  Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it.  I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix.  This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.

CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-24 11:40:19 -08:00
+								        if (upcall->fitness == ODP_FIT_ERROR) {
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								            goto free_dupcall;
 								        }
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								        if (dupcall->mru) {
 								            mru = nl_attr_get_u16(dupcall->mru);
-												ofproto-dpif-upcall: Echo HASH attribute back to datapath.

The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.

The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-15 10:58:59 +08:00
+								        }
 								        if (dupcall->hash) {
-												ofproto-dpif-upcall: Fix using uninitialized upcall hash.

upcalls are allocated on stack and 'hash' field must be initialized
regardless of attribute existence because it will be used later.

 Conditional jump or move depends on uninitialised value(s)
    at 0xFA74A7: dpif_netlink_encode_execute (dpif-netlink.c:1828)
    by 0xFA6DE8: dpif_netlink_operate__ (dpif-netlink.c:1906)
    by 0xFA612F: dpif_netlink_operate_chunks (dpif-netlink.c:2219)
    by 0xFA0E36: dpif_netlink_operate (dpif-netlink.c:2275)
    by 0xE5AFAC: dpif_operate (dpif.c:1376)
    by 0xDF3922: handle_upcalls (ofproto-dpif-upcall.c:1615)
    by 0xDF269B: recv_upcalls (ofproto-dpif-upcall.c:857)
    by 0xDF1C49: udpif_upcall_handler (ofproto-dpif-upcall.c:759)
    by 0xF3A3FE: ovsthread_wrapper (ovs-thread.c:383)
    by 0x565F6DA: start_thread (pthread_create.c:463)
    by 0x615988E: clone (clone.S:95)
  Uninitialised value was created by a stack allocation
    at 0xDF2258: recv_upcalls (ofproto-dpif-upcall.c:773)

Fixes: 0442bfb11d6c ("ofproto-dpif-upcall: Echo HASH attribute back to datapath.")
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-01-04 01:07:36 +01:00
+								            hash = nl_attr_get_u64(dupcall->hash);
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								        }
-												ofproto-dpif-upcall: Fix use of cleared stack memory.

Commit cc377352d (ofproto: Reorganize in preparation for direct
dpdk upcalls.) introduced the bug that uses variable defined on
the stack inside while loop for reading dpif upcalls and keeps
reference to attributes of the variable within the same function
after the stack is cleared.  This bug can cause ovs abort.

This commit fixes the above issue by defining an array of the
variable on the function stack.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-08-15 00:59:36 -07:00
+								        error = upcall_receive(upcall, udpif->backer, &dupcall->packet,
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								                               dupcall->type, dupcall->userdata, flow, mru,
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								                               &dupcall->ufid, PMD_ID_NULL, &errorp);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        if (error) {
 								            if (error == ENODEV) {
 								                /* Received packet on datapath port for which we couldn't
 								                 * associate an ofproto.  This can happen if a port is removed
 								                 * while traffic is being received.  Print a rate-limited
 								                 * message in case it happens frequently. */
-												ofproto-dpif-upcall: Fix use of cleared stack memory.

Commit cc377352d (ofproto: Reorganize in preparation for direct
dpdk upcalls.) introduced the bug that uses variable defined on
the stack inside while loop for reading dpif upcalls and keeps
reference to attributes of the variable within the same function
after the stack is cleared.  This bug can cause ovs abort.

This commit fixes the above issue by defining an array of the
variable on the function stack.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-08-15 00:59:36 -07:00
+								                dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall->key,
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								                              dupcall->key_len, NULL, 0, NULL, 0,
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								                              &dupcall->ufid, PMD_ID_NULL, NULL);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								                VLOG_INFO_RL(&rl, "received packet on unassociated datapath "
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								                             "port %"PRIu32"%s%s%s", flow->in_port.odp_port,
 								                             errorp ? " (" : "", errorp ? errorp : "",
 								                             errorp ? ")" : "");
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								            }
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								            free(errorp);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								            goto free_dupcall;
 								        }
-												ofproto-dpif-upcall: Fix use of cleared stack memory.

Commit cc377352d (ofproto: Reorganize in preparation for direct
dpdk upcalls.) introduced the bug that uses variable defined on
the stack inside while loop for reading dpif upcalls and keeps
reference to attributes of the variable within the same function
after the stack is cleared.  This bug can cause ovs abort.

This commit fixes the above issue by defining an array of the
variable on the function stack.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-08-15 00:59:36 -07:00
+								        upcall->key = dupcall->key;
 								        upcall->key_len = dupcall->key_len;
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								        upcall->ufid = &dupcall->ufid;
-												ofproto-dpif-upcall: Fix using uninitialized upcall hash.

upcalls are allocated on stack and 'hash' field must be initialized
regardless of attribute existence because it will be used later.

 Conditional jump or move depends on uninitialised value(s)
    at 0xFA74A7: dpif_netlink_encode_execute (dpif-netlink.c:1828)
    by 0xFA6DE8: dpif_netlink_operate__ (dpif-netlink.c:1906)
    by 0xFA612F: dpif_netlink_operate_chunks (dpif-netlink.c:2219)
    by 0xFA0E36: dpif_netlink_operate (dpif-netlink.c:2275)
    by 0xE5AFAC: dpif_operate (dpif.c:1376)
    by 0xDF3922: handle_upcalls (ofproto-dpif-upcall.c:1615)
    by 0xDF269B: recv_upcalls (ofproto-dpif-upcall.c:857)
    by 0xDF1C49: udpif_upcall_handler (ofproto-dpif-upcall.c:759)
    by 0xF3A3FE: ovsthread_wrapper (ovs-thread.c:383)
    by 0x565F6DA: start_thread (pthread_create.c:463)
    by 0x615988E: clone (clone.S:95)
  Uninitialised value was created by a stack allocation
    at 0xDF2258: recv_upcalls (ofproto-dpif-upcall.c:773)

Fixes: 0442bfb11d6c ("ofproto-dpif-upcall: Echo HASH attribute back to datapath.")
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-01-04 01:07:36 +01:00
+								        upcall->hash = hash;
-												dpif-netlink: Provide original upcall pid in 'execute' commands.

When a packet enters kernel datapath and there is no flow to handle it,
packet goes to userspace through a MISS upcall.  With per-CPU upcall
dispatch mechanism, we're using the current CPU id to select the
Netlink PID on which to send this packet.  This allows us to send
packets from the same traffic flow through the same handler.

The handler will process the packet, install required flow into the
kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE.

While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a
recirculation action that will pass the (likely modified) packet
through the flow lookup again.  And if the flow is not found, the
packet will be sent to userspace again through another MISS upcall.

However, the handler thread in userspace is likely running on a
different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled
in the syscall context of that thread.  So, when the time comes to
send the packet through another upcall, the per-CPU dispatch will
choose a different Netlink PID, and this packet will end up processed
by a different handler thread on a different CPU.

The process continues as long as there are new recirculations, each
time the packet goes to a different handler thread before it is sent
out of the OVS datapath to the destination port.  In real setups the
number of recirculations can go up to 4 or 5, sometimes more.

There is always a chance to re-order packets while processing upcalls,
because userspace will first install the flow and then re-inject the
original packet.  So, there is a race window when the flow is already
installed and the second packet can match it inside the kernel and be
forwarded to the destination before the first packet is re-injected.
But the fact that packets are going through multiple upcalls handled
by different userspace threads makes the reordering noticeably more
likely, because we not only have a race between the kernel and a
userspace handler (which is hard to avoid), but also between multiple
userspace handlers.

For example, let's assume that 10 packets got enqueued through a MISS
upcall for handler-1, it will start processing them, will install the
flow into the kernel and start re-injecting packets back, from where
they will go through another MISS to handler-2.  Handler-2 will install
the flow into the kernel and start re-injecting the packets, while
handler-1 continues to re-inject the last of the 10 packets, they will
hit the flow installed by handler-2 and be forwarded without going to
the handler-2, while handler-2 still re-injects the first of these 10
packets.  Given multiple recirculations and misses, these 10 packets
may end up completely mixed up on the output from the datapath.

Let's provide the original upcall PID via the new netlink attribute
OVS_PACKET_ATTR_UPCALL_PID.  This way the upcall triggered during the
execution will go to the same handler.  Packets will be enqueued to
the same socket and re-injected in the same order.  This doesn't
eliminate re-ordering as stated above, since we still have a race
between the kernel and the handler thread, but it allows to eliminate
races between multiple handlers.

The openvswitch kernel module ignores unknown attributes for the
OVS_PACKET_CMD_EXECUTE, so it's safe to provide it even on older
kernels.

Reported-at: https://issues.redhat.com/browse/FDP-1479
Link: https://lore.kernel.org/netdev/20250702155043.2331772-1-i.maximets@ovn.org/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-07-08 13:34:02 +02:00
+								        upcall->pid = dupcall->pid;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												Extend OVS IPFIX exporter to export tunnel headers

Extend IPFIX exporter to export tunnel headers when both input and output
of the port.
Add three other_config options in IPFIX table: enable-input-sampling,
enable-output-sampling and enable-tunnel-sampling, to control whether
sampling tunnel info, on which direction (input or output).
Insert sampling action before output action and the output tunnel port
is sent to datapath in the sampling action.
Make datapath collect output tunnel info and send it back to userpace
in upcall message with a new additional optional attribute.
Add a tunnel ports map to make the tunnel port lookup faster in sampling
upcalls in IPFIX exporter. Make the IPFIX exporter generate IPFIX template
sets with enterprise elements for the tunnel info, save the tunnel info
in IPFIX cache entries, and send IPFIX DATA with tunnel info.
Add flowDirection element in IPFIX templates.

Signed-off-by: Wenyu Zhang <wenyuz@vmware.com>
Acked-by: Romain Lenglet <rlenglet@vmware.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-17 20:19:36 -07:00
+								        upcall->out_tun_key = dupcall->out_tun_key;
-												ofproto-dpif-upcall: Indentation fix.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-07-22 15:49:51 -07:00
+								        upcall->actions = dupcall->actions;
-												Extend OVS IPFIX exporter to export tunnel headers

Extend IPFIX exporter to export tunnel headers when both input and output
of the port.
Add three other_config options in IPFIX table: enable-input-sampling,
enable-output-sampling and enable-tunnel-sampling, to control whether
sampling tunnel info, on which direction (input or output).
Insert sampling action before output action and the output tunnel port
is sent to datapath in the sampling action.
Make datapath collect output tunnel info and send it back to userpace
in upcall message with a new additional optional attribute.
Add a tunnel ports map to make the tunnel port lookup faster in sampling
upcalls in IPFIX exporter. Make the IPFIX exporter generate IPFIX template
sets with enterprise elements for the tunnel info, save the tunnel info
in IPFIX cache entries, and send IPFIX DATA with tunnel info.
Add flowDirection element in IPFIX templates.

Signed-off-by: Wenyu Zhang <wenyuz@vmware.com>
Acked-by: Romain Lenglet <rlenglet@vmware.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-17 20:19:36 -07:00
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								        pkt_metadata_from_flow(&dupcall->packet.md, flow);
 								        flow_extract(&dupcall->packet, flow);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
+								        error = process_upcall(udpif, upcall,
 								                               &upcall->odp_actions, &upcall->wc);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        if (error) {
 								            goto cleanup;
 								        }
 								        n_upcalls++;
 								        continue;
 								cleanup:
 								        upcall_uninit(upcall);
 								free_dupcall:
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								        dp_packet_uninit(&dupcall->packet);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        ofpbuf_uninit(recv_buf);
 								    }
 								    if (n_upcalls) {
 								        handle_upcalls(handler->udpif, upcalls, n_upcalls);
 								        for (i = 0; i < n_upcalls; i++) {
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								            dp_packet_uninit(&dupcalls[i].packet);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								            ofpbuf_uninit(&recv_bufs[i]);
 								            upcall_uninit(&upcalls[i]);
 								        }
 								    }
 								    return n_upcalls;
 								}
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								static void
 								udpif_run_flow_rebalance(struct udpif *udpif)
 								{
 								    long long int now = 0;
 								    /* Don't rebalance if OFFL_REBAL_INTVL_MSEC have not elapsed */
 								    now = time_msec();
 								    if (now < udpif->offload_rebalance_time + OFFL_REBAL_INTVL_MSEC) {
 								        return;
 								    }
 								    if (!netdev_any_oor()) {
 								        return;
 								    }
 								    VLOG_DBG("Offload rebalance: Found OOR netdevs");
 								    udpif->offload_rebalance_time = now;
 								    udpif_flow_rebalance(udpif);
 								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								static void *
 								udpif_revalidator(void *arg)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    /* Used by all revalidators. */
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    struct revalidator *revalidator = arg;
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    struct udpif *udpif = revalidator->udpif;
 								    bool leader = revalidator == &udpif->revalidators[0];
 								    /* Used only by the leader. */
 								    long long int start_time = 0;
 								    uint64_t last_reval_seq = 0;
 								    size_t n_flows = 0;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
-												ovs-thread: Make caller provide thread name when creating a thread.

Thread names are occasionally very useful for debugging, but from time to
time we've forgotten to set one.  This commit adds the new thread's name
as a parameter to the function to start a thread, to make that mistake
impossible.  This also simplifies code, since two function calls become
only one.

This makes a few other changes to the thread creation function:

    * Since it is no longer a direct wrapper around a pthread function,
      rename it to avoid giving that impression.

    * Remove 'pthread_attr_t *' param that every caller supplied as NULL.

    * Change 'pthread *' parameter into a return value, for convenience.

The system-stats code hadn't set a thread name, so this fixes that issue.

This patch is a prerequisite for making RCU report the name of a thread
that is blocking RCU synchronization, because the easiest way to do that is
for ovsrcu_quiesce_end() to record the current thread's name.
ovsrcu_quiesce_end() is called before the thread function is called, so it
won't get a name set within the thread function itself.  Setting the thread
name earlier, as in this patch, avoids the problem.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-25 17:46:21 -07:00
+								    revalidator->id = ovsthread_id_self();
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    for (;;) {
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								        if (leader) {
 								            uint64_t reval_seq;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								            recirc_run(); /* Recirculation cleanup. */
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            reval_seq = seq_read(udpif->reval_seq);
 								            last_reval_seq = reval_seq;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            n_flows = udpif_get_n_flows(udpif);
 								            udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows);
 								            udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2;
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								            /* Only the leader checks the pause latch to prevent a race where
 								             * some threads think it's false and proceed to block on
 								             * reval_barrier and others think it's true and block indefinitely
 								             * on the pause_barrier */
 								            udpif->pause = latch_is_set(&udpif->pause_latch);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            /* Only the leader checks the exit latch to prevent a race where
 								             * some threads think it's true and exit and others think it's
 								             * false and block indefinitely on the reval_barrier */
 								            udpif->reval_exit = latch_is_set(&udpif->exit_latch);
 								            start_time = time_msec();
-												ofproto-dpif-upcall: Pause revalidators when purging.

This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.

A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.

This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:

2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
	revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
	flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
	packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
	recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
	ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
	packet_type(ns=0,id=0),
	eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
	eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
	ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
	used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
	failed to flow_get (No such file or directory)
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""

To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.

Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:

2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
	revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
	skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
	ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
	dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
	tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
	flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
	failed to flow_get (No such file or directory)
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9

To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.

As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.

Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-10-18 16:23:53 +02:00
+								            if (!udpif->reval_exit && !udpif->pause) {
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								                bool terse_dump;
-												upcall: Simplify enable_ufid debug option.

We previously tracked the debug enable/disable of UFID in each udpif,
and allowed the ovs-appctl debug option to turn on UFID features even if
the datapath doesn't support it.

This commit shifts the enable_ufid debug flag to a single flag, and
provides a helper to determine whether UFID features should be used on a
per-udpif basis.

Suggested-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-19 09:54:38 -08:00
+								                terse_dump = udpif_use_ufid(udpif);
-												dpctl: Add an option to dump only certain kinds of flows

Usage:
    # to dump all datapath flows (default):
    ovs-dpctl dump-flows

    # to dump only flows that in kernel datapath:
    ovs-dpctl dump-flows type=ovs

    # to dump only flows that are offloaded:
    ovs-dpctl dump-flows type=offloaded

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2017-06-13 18:03:49 +03:00
+								                udpif->dump = dpif_flow_dump_create(udpif->dpif, terse_dump,
 								                                                    NULL);
-												utilities: Add revalidator measurement script and needed USDT probes.

This patch adds a Python script that can be used to analyze the
revalidator runs by providing statistics (including some real time
graphs).

The USDT events can also be captured to a file and used for
later offline analysis.

The following blog explains the Open vSwitch revalidator
implementation and how this tool can help you understand what is
happening in your system.

https://developers.redhat.com/articles/2022/10/19/open-vswitch-revalidator-process-explained

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-23 12:03:29 +01:00
+								                OVS_USDT_PROBE(udpif_revalidator, start_dump, udpif, n_flows);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								            }
 								        }
-												ofproto-dpif-upcall: Pause revalidators when purging.

This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.

A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.

This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:

2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
	revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
	flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
	packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
	recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
	ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
	packet_type(ns=0,id=0),
	eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
	eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
	ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
	used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
	failed to flow_get (No such file or directory)
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""

To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.

Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:

2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
	revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
	skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
	ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
	dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
	tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
	flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
	failed to flow_get (No such file or directory)
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9

To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.

As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.

Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-10-18 16:23:53 +02:00
+								        /* Wait for the leader to reach this point. */
-												ovs-thread: Implement OVS specific barrier.

Non-leader revalidator thread uses pthread_barrier_* functions in their
main loop to synchronize with leader thread.  However, since those threads
only call poll_block() intermittently, the poll interval check in
poll_block() can wrongly take the time since last call as poll interval
and issue the following warnings:

"Unreasonably long XXXXms poll interval".

To prevent it, this commit implements the barrier struct and operations
for OVS which allow thread to block on barrier via poll_block().

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>


											
										
										
											2014-05-29 15:37:37 -07:00
+								        ovs_barrier_block(&udpif->reval_barrier);
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								        if (udpif->pause) {
 								            revalidator_pause(revalidator);
-												ofproto-dpif-upcall: Pause revalidators when purging.

This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.

A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.

This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:

2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
	revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
	flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
	packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
	recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
	ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
	packet_type(ns=0,id=0),
	eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
	eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
	ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
	used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
	failed to flow_get (No such file or directory)
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""

To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.

Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:

2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
	revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
	skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
	ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
	dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
	tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
	flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
	failed to flow_get (No such file or directory)
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9

To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.

As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.

Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-10-18 16:23:53 +02:00
+								            if (!udpif->reval_exit) {
 								                /* The main thread resumed all validators, but the leader
 								                 * didn't start the dump, go to next iteration. */
 								                continue;
 								            }
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								        }
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								        if (udpif->reval_exit) {
 								            break;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        }
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								        revalidate(revalidator);
 								        /* Wait for all flows to have been dumped before we garbage collect. */
-												ovs-thread: Implement OVS specific barrier.

Non-leader revalidator thread uses pthread_barrier_* functions in their
main loop to synchronize with leader thread.  However, since those threads
only call poll_block() intermittently, the poll interval check in
poll_block() can wrongly take the time since last call as poll interval
and issue the following warnings:

"Unreasonably long XXXXms poll interval".

To prevent it, this commit implements the barrier struct and operations
for OVS which allow thread to block on barrier via poll_block().

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>


											
										
										
											2014-05-29 15:37:37 -07:00
+								        ovs_barrier_block(&udpif->reval_barrier);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								        revalidator_sweep(revalidator);
 								        /* Wait for all revalidators to finish garbage collection. */
-												ovs-thread: Implement OVS specific barrier.

Non-leader revalidator thread uses pthread_barrier_* functions in their
main loop to synchronize with leader thread.  However, since those threads
only call poll_block() intermittently, the poll interval check in
poll_block() can wrongly take the time since last call as poll interval
and issue the following warnings:

"Unreasonably long XXXXms poll interval".

To prevent it, this commit implements the barrier struct and operations
for OVS which allow thread to block on barrier via poll_block().

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>


											
										
										
											2014-05-29 15:37:37 -07:00
+								        ovs_barrier_block(&udpif->reval_barrier);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
 								        if (leader) {
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								            unsigned int flow_limit;
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            long long int duration;
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								            atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								            dpif_flow_dump_destroy(udpif->dump);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            seq_change(udpif->dump_seq);
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								            if (netdev_is_offload_rebalance_policy_enabled()) {
 								                udpif_run_flow_rebalance(udpif);
 								            }
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
 								            duration = MAX(time_msec() - start_time, 1);
 								            udpif->dump_duration = duration;
 								            if (duration > 2000) {
 								                flow_limit /= duration / 1000;
-												ofproto-dpif-upcall: Add flow_limit coverage counters.

Add new coverage counters that might help debugging flow_limit
related issues.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-01-10 12:25:56 +01:00
+								                COVERAGE_INC(upcall_flow_limit_scaled);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            } else if (duration > 1300) {
 								                flow_limit = flow_limit * 3 / 4;
-												ofproto-dpif-upcall: Add flow_limit coverage counters.

Add new coverage counters that might help debugging flow_limit
related issues.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-01-10 12:25:56 +01:00
+								                COVERAGE_INC(upcall_flow_limit_reduced);
-												ofproto-dpif-upcall: Fix for flow limit issue in revalidator

When the revalidator thread takes a long time to dump data path
flows (e.g. due to busy CPU), it reduces the maximum limit for
new flows that can be added. This results in more upcalls for
packets which do not find data path flows and temporarily reduces
overall throughput. When the situation improves and the revalidator
gets enough CPU cycles, it should increase the flow limit allowing
more flows to get inserted.

Currently the flow limit does not increase if the existing number of
flows is less than 2000 and does not allow any new flows due to
incorrect condition check. This results in a permanent drop in
performance in OVS with no automatic recovery.

This patch fixes the conditional check for increasing flow limit.

Signed-off-by: Vishal Deep Ajmera <vishal.deep.ajmera@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-08-03 06:04:23 +05:30
+								            } else if (duration < 1000 &&
 								                       flow_limit < n_flows * 1000 / duration) {
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								                flow_limit += 1000;
-												ofproto-dpif-upcall: Add flow_limit coverage counters.

Add new coverage counters that might help debugging flow_limit
related issues.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-01-10 12:25:56 +01:00
+								                COVERAGE_INC(upcall_flow_limit_grew);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            }
 								            flow_limit = MIN(ofproto_flow_limit, MAX(flow_limit, 1000));
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								            atomic_store_relaxed(&udpif->flow_limit, flow_limit);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            if (duration > 2000) {
-												ofproto-dpif-upcall: Change flow dump duration message to WARN level.

Currently the 'Spent an unreasonably long Xms dumping flows' message
is set to the INFO level. However, based on this, we are also
drastically limiting the number of flows in the datapath, and this
would warrant a WARNING level.

Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-01-10 12:25:55 +01:00
+								                VLOG_WARN("Spent an unreasonably long %lldms dumping flows",
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								                          duration);
 								            }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												utilities: Add revalidator measurement script and needed USDT probes.

This patch adds a Python script that can be used to analyze the
revalidator runs by providing statistics (including some real time
graphs).

The USDT events can also be captured to a file and used for
later offline analysis.

The following blog explains the Open vSwitch revalidator
implementation and how this tool can help you understand what is
happening in your system.

https://developers.redhat.com/articles/2022/10/19/open-vswitch-revalidator-process-explained

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-23 12:03:29 +01:00
+								            OVS_USDT_PROBE(udpif_revalidator, sweep_done, udpif, n_flows,
 								                           MIN(ofproto_max_idle, ofproto_max_revalidator));
-												upcall: Configure datapath max-revalidator through ovs-vsctl.

This patch adds a new configuration option, "max-revalidator" to the
Open_vSwitch "other-config" column. This sets maximum allowed ravalidator
timeout. Actual timeout value is determined at runtime as minimum of
"max-idle" and "max-revalidator".

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-07-21 11:34:21 +03:00
+								            poll_timer_wait_until(start_time + MIN(ofproto_max_idle,
 								                                                   ofproto_max_revalidator));
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            seq_wait(udpif->reval_seq, last_reval_seq);
 								            latch_wait(&udpif->exit_latch);
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								            latch_wait(&udpif->pause_latch);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								            poll_block();
-												upcall: Don't start new revalidation round too soon after the last one.

The execution time of 'ovs-ofctl add-flows' with a large number of
flows can be more than halved if revalidators are not running after
each flow mod separately.  This was first suspected when it was found
that 'ovs-ofctl --bundle add-flows' is about 10 times faster than the
same command without the '--bundle' option in a scenario where there
is a large set of flows being added and no datapath flows at all.  One
of the differences caused by the '--bundle' option is that the
revalidators are woken up only once, at the end of the whole set of
flow table changes, rather than after each flow table change
individually.

This patch limits the revalidation to run at most 200 times a second
by enforcing a minimum of 5ms time gap between the start times of
revalidation rounds.  If nothing happens in, say 6 milliseconds, and
then a new flow table change is signaled, the revalidator threads wake
up immediately without any further delay.  Values smaller than 5 were
found to increase the 'ovs-ofctl add-flows' execution time noticeably.

Since the revalidators are not running after each flow mod, the
overall OVS CPU utilization during the 'ovs-ofctl add-flows' run time
is reduced roughly by one core on a four core machine.

In testing the 'ovs-ofctl add-flows' execution time is not
significantly improved from this even if the revalidators are not
notified about the flow table changes at all.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-09-27 12:55:46 -07:00
 								            if (!latch_is_set(&udpif->pause_latch) &&
 								                !latch_is_set(&udpif->exit_latch)) {
 								                long long int now = time_msec();
 								                /* Block again if we are woken up within 5ms of the last start
 								                 * time. */
 								                start_time += 5;
 								                if (now < start_time) {
 								                    poll_timer_wait_until(start_time);
 								                    latch_wait(&udpif->exit_latch);
 								                    latch_wait(&udpif->pause_latch);
 								                    poll_block();
 								                }
 								            }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        }
 								    }
 								    return NULL;
 								}
-												dpif: Restore a few lines with form feed characters

A few lines with form feed characters (ASCII: ^L) were accidentally
deleted by a recent commit to support rebalancing of offloaded flows.
This patch reverts those lines.

Fixes: 57924fc91c ("revalidator: Rebalance offloaded flows")
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-01 00:51:41 +05:30
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								static enum upcall_type
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
 								                struct user_action_cookie *cookie)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
 								    /* First look at the upcall type. */
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    switch (type) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    case DPIF_UC_ACTION:
 								        break;
 								    case DPIF_UC_MISS:
 								        return MISS_UPCALL;
 								    case DPIF_N_UC_TYPES:
 								    default:
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, type);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        return BAD_UPCALL;
 								    }
 								    /* "action" upcalls need a closer look. */
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    if (!userdata) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        VLOG_WARN_RL(&rl, "action upcall missing cookie");
 								        return BAD_UPCALL;
 								    }
-												ofproto-dpif: Use a fixed size userspace cookie.

This simplifies the cookie handling a bit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-04 12:37:57 -08:00
 								    size_t userdata_len = nl_attr_get_size(userdata);
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    if (userdata_len != sizeof *cookie) {
-												Avoid printf type modifiers not supported by MSVC C runtime library.

The MSVC C library printf() implementation does not support the 'z', 't',
'j', or 'hh' format specifiers.  This commit changes the Open vSwitch code
to avoid those format specifiers, switching to standard macros from
<inttypes.h> where available and inventing new macros resembling them
where necessary.  It also updates CodingStyle to specify the macros' use
and adds a Makefile rule to report violations.

Signed-off-by: Alin Serdean <aserdean@cloudbasesolutions.com>
Co-authored-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-25 23:38:48 -08:00
+								        VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								                     userdata_len);
 								        return BAD_UPCALL;
 								    }
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    memcpy(cookie, nl_attr_get(userdata), sizeof *cookie);
 								    if (cookie->type == USER_ACTION_COOKIE_SFLOW) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        return SFLOW_UPCALL;
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    } else if (cookie->type == USER_ACTION_COOKIE_SLOW_PATH) {
 								        return SLOW_PATH_UPCALL;
 								    } else if (cookie->type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        return FLOW_SAMPLE_UPCALL;
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    } else if (cookie->type == USER_ACTION_COOKIE_IPFIX) {
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        return IPFIX_UPCALL;
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								    } else if (cookie->type == USER_ACTION_COOKIE_CONTROLLER) {
 								        return CONTROLLER_UPCALL;
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    } else {
 								        VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								                     " and size %"PRIuSIZE, cookie->type, userdata_len);
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        return BAD_UPCALL;
 								    }
 								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								/* Calculates slow path actions for 'xout'.  'buf' must statically be
 								 * initialized with at least 128 bytes of space. */
 								static void
 								compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								                  odp_port_t odp_in_port, ofp_port_t ofp_in_port,
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								                  struct ofpbuf *buf, uint32_t meter_id,
 								                  struct uuid *ofproto_uuid)
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								{
-												ofproto-dpif: Use a fixed size userspace cookie.

This simplifies the cookie handling a bit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-04 12:37:57 -08:00
+								    struct user_action_cookie cookie;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    odp_port_t port;
 								    uint32_t pid;
-												ofproto-dpif: Fix using uninitialised memory in user_action_cookie.

Designated initializers are not suitable for initializing non-packed
structures and unions which are subjects for comparison by memcmp().

Whole memory for 'struct user_action_cookie' must be explicitly cleared
before using because it will be copied with memcpy and later compared
by memcmp in ofpbuf_equal().

Few issues found be valgrind:

 Thread 13 revalidator11:
 Conditional jump or move depends on uninitialised value(s)
    at 0x4C35D96: __memcmp_sse4_1 (in vgpreload_memcheck.so)
    by 0x9D4404: ofpbuf_equal (ofpbuf.h:273)
    by 0x9D4404: revalidate_ukey__ (ofproto-dpif-upcall.c:2219)
    by 0x9D4404: revalidate_ukey (ofproto-dpif-upcall.c:2286)
    by 0x9D62AC: revalidate (ofproto-dpif-upcall.c:2685)
    by 0x9D62AC: udpif_revalidator (ofproto-dpif-upcall.c:942)
    by 0xA9C732: ovsthread_wrapper (ovs-thread.c:383)
    by 0x5FF86DA: start_thread (pthread_create.c:463)
    by 0x6AF488E: clone (clone.S:95)
  Uninitialised value was created by a stack allocation
    at 0x9D4450: compose_slow_path (ofproto-dpif-upcall.c:1062)

 Thread 11 revalidator16:
 Conditional jump or move depends on uninitialised value(s)
    at 0x4C35D96: __memcmp_sse4_1 (in vgpreload_memcheck.so)
    by 0x9D4404: ofpbuf_equal (ofpbuf.h:273)
    by 0x9D4404: revalidate_ukey__ (ofproto-dpif-upcall.c:2220)
    by 0x9D4404: revalidate_ukey (ofproto-dpif-upcall.c:2287)
    by 0x9D62BC: revalidate (ofproto-dpif-upcall.c:2686)
    by 0x9D62BC: udpif_revalidator (ofproto-dpif-upcall.c:942)
    by 0xA9C6D2: ovsthread_wrapper (ovs-thread.c:383)
    by 0x5FF86DA: start_thread (pthread_create.c:463)
    by 0x6AF488E: clone (clone.S:95)
  Uninitialised value was created by a stack allocation
    at 0x9DC4E0: compose_sflow_action (ofproto-dpif-xlate.c:3211)

The struct was never marked as 'packed', however it was manually
adjusted to be so in practice.
Old IPFIX related commit first made the structure non-contiguous.
Commit 8de6ff3ea864 ("ofproto-dpif: Use a fixed size userspace cookie.")
added uninitialized parts of the additional union space and the next
one introduced new holes between structure fields for all cases.

CC: Justin Pettit <jpettit@ovn.org>
Fixes: 8b7ea2d48033 ("Extend OVS IPFIX exporter to export tunnel headers")
Fixes: 8de6ff3ea864 ("ofproto-dpif: Use a fixed size userspace cookie.")
Fixes: fcb9579be3c7 ("ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.")
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-07-25 18:11:13 +03:00
+								    memset(&cookie, 0, sizeof cookie);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								    cookie.ofp_in_port = ofp_in_port;
 								    cookie.ofproto_uuid = *ofproto_uuid;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    cookie.slow_path.reason = xout->slow;
 								    port = xout->slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)
 								        ? ODPP_NONE
 								        : odp_in_port;
-												dpif: Remove support for multiple queues per port.

Commit 69c51582ff78 ("dpif-netlink: don't allocate per thread netlink
sockets") removed dpif-netlink support for multiple queues per port.
No remaining dpif provider supports multiple queues per port, so
remove infrastructure for the feature.

CC: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-09-25 15:14:13 -07:00
+								    pid = dpif_port_get_pid(udpif->dpif, port);
-												ofproto: Meter slowpath action when action upcall meters are configured

If a slow path action is a controller action, meter it when the
controller meter is configured.  For other kinds of slow path actions,
meter it when the slowpath meter is configured.

Note, this patch only considers the meters configuration of the
packet's input bridge, which may not be the same bridge that the
action is generated.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-04-11 16:10:41 -07:00
 								    size_t offset;
 								    size_t ac_offset;
 								    if (meter_id != UINT32_MAX) {
 								        /* If slowpath meter is configured, generate clone(meter, userspace)
 								         * action. */
 								        offset = nl_msg_start_nested(buf, OVS_ACTION_ATTR_SAMPLE);
 								        nl_msg_put_u32(buf, OVS_SAMPLE_ATTR_PROBABILITY, UINT32_MAX);
 								        ac_offset = nl_msg_start_nested(buf, OVS_SAMPLE_ATTR_ACTIONS);
 								        nl_msg_put_u32(buf, OVS_ACTION_ATTR_METER, meter_id);
 								    }
-												ofproto-dpif: Use a fixed size userspace cookie.

This simplifies the cookie handling a bit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-04 12:37:57 -08:00
+								    odp_put_userspace_action(pid, &cookie, sizeof cookie,
-												odp-util: Fix netlink message overflow with userdata.

Too big userdata could overflow netlink message leading to out-of-bound
memory accesses or assertion while formatting nested actions.

Fix that by checking the size and returning correct error code.

Credit to OSS-Fuzz.

Reported-at: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=27640
Fixes: e995e3df57ea ("Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.")
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Flavio Leitner <fbl@sysclose.org>

											
										
										
											2020-12-21 16:01:04 +01:00
+								                             ODPP_NONE, false, buf, NULL);
-												ofproto: Meter slowpath action when action upcall meters are configured

If a slow path action is a controller action, meter it when the
controller meter is configured.  For other kinds of slow path actions,
meter it when the slowpath meter is configured.

Note, this patch only considers the meters configuration of the
packet's input bridge, which may not be the same bridge that the
action is generated.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-04-11 16:10:41 -07:00
 								    if (meter_id != UINT32_MAX) {
 								        nl_msg_end_nested(buf, ac_offset);
 								        nl_msg_end_nested(buf, offset);
 								    }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								}
-												ofproto-dpif-upcall: Fix a free of uninitialized memory.

On current master, when 'upcall_receive()' returns error, the
ofpbuf 'upcall->put_actions' is uninitialized.  In some usecase,
the failure of 'upcall_receive()' will cause uninitialize of
'upcall->put_actions' and free of uninitialized pointer.

This commit fixes the issue by making the caller not conduct
the uninitialize of the 'upcall' when there is error.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-08 10:41:36 -07:00
+								/* If there is no error, the upcall must be destroyed with upcall_uninit()
 								 * before quiescing, as the referred objects are guaranteed to exist only
 								 * until the calling thread quiesces.  Otherwise, do not call upcall_uninit()
 								 * since the 'upcall->put_actions' remains uninitialized. */
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								static int
 								upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								               const struct dp_packet *packet, enum dpif_upcall_type type,
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								               const struct nlattr *userdata, const struct flow *flow,
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								               const unsigned int mru,
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								               const ovs_u128 *ufid, const unsigned pmd_id,
 								               char **errorp)
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								{
 								    int error;
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    upcall->type = classify_upcall(type, userdata, &upcall->cookie);
 								    if (upcall->type == BAD_UPCALL) {
 								        return EAGAIN;
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								    } else if (upcall->type == MISS_UPCALL) {
 								        error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix,
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								                             &upcall->sflow, NULL, &upcall->ofp_in_port,
 								                             errorp);
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								        if (error) {
 								            return error;
 								        }
 								    } else {
 								        struct ofproto_dpif *ofproto
 								            = ofproto_dpif_lookup_by_uuid(&upcall->cookie.ofproto_uuid);
 								        if (!ofproto) {
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								            if (errorp) {
 								                *errorp = xstrdup("upcall could not find ofproto");
 								            } else {
 								                VLOG_INFO_RL(&rl, "upcall could not find ofproto");
 								            }
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								            return ENODEV;
 								        }
 								        upcall->ofproto = ofproto;
 								        upcall->ipfix = ofproto->ipfix;
 								        upcall->sflow = ofproto->sflow;
 								        upcall->ofp_in_port = upcall->cookie.ofp_in_port;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    }
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								    upcall->recirc = NULL;
 								    upcall->have_recirc_ref = false;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    upcall->flow = flow;
 								    upcall->packet = packet;
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								    upcall->ufid = ufid;
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								    upcall->pmd_id = pmd_id;
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
+								    ofpbuf_use_stub(&upcall->odp_actions, upcall->odp_actions_stub,
 								                    sizeof upcall->odp_actions_stub);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    ofpbuf_init(&upcall->put_actions, 0);
 								    upcall->xout_initialized = false;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    upcall->ukey_persists = false;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    upcall->ukey = NULL;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    upcall->key = NULL;
 								    upcall->key_len = 0;
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								    upcall->mru = mru;
-												dpif-netlink: Provide original upcall pid in 'execute' commands.

When a packet enters kernel datapath and there is no flow to handle it,
packet goes to userspace through a MISS upcall.  With per-CPU upcall
dispatch mechanism, we're using the current CPU id to select the
Netlink PID on which to send this packet.  This allows us to send
packets from the same traffic flow through the same handler.

The handler will process the packet, install required flow into the
kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE.

While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a
recirculation action that will pass the (likely modified) packet
through the flow lookup again.  And if the flow is not found, the
packet will be sent to userspace again through another MISS upcall.

However, the handler thread in userspace is likely running on a
different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled
in the syscall context of that thread.  So, when the time comes to
send the packet through another upcall, the per-CPU dispatch will
choose a different Netlink PID, and this packet will end up processed
by a different handler thread on a different CPU.

The process continues as long as there are new recirculations, each
time the packet goes to a different handler thread before it is sent
out of the OVS datapath to the destination port.  In real setups the
number of recirculations can go up to 4 or 5, sometimes more.

There is always a chance to re-order packets while processing upcalls,
because userspace will first install the flow and then re-inject the
original packet.  So, there is a race window when the flow is already
installed and the second packet can match it inside the kernel and be
forwarded to the destination before the first packet is re-injected.
But the fact that packets are going through multiple upcalls handled
by different userspace threads makes the reordering noticeably more
likely, because we not only have a race between the kernel and a
userspace handler (which is hard to avoid), but also between multiple
userspace handlers.

For example, let's assume that 10 packets got enqueued through a MISS
upcall for handler-1, it will start processing them, will install the
flow into the kernel and start re-injecting packets back, from where
they will go through another MISS to handler-2.  Handler-2 will install
the flow into the kernel and start re-injecting the packets, while
handler-1 continues to re-inject the last of the 10 packets, they will
hit the flow installed by handler-2 and be forwarded without going to
the handler-2, while handler-2 still re-injects the first of these 10
packets.  Given multiple recirculations and misses, these 10 packets
may end up completely mixed up on the output from the datapath.

Let's provide the original upcall PID via the new netlink attribute
OVS_PACKET_ATTR_UPCALL_PID.  This way the upcall triggered during the
execution will go to the same handler.  Packets will be enqueued to
the same socket and re-injected in the same order.  This doesn't
eliminate re-ordering as stated above, since we still have a race
between the kernel and the handler thread, but it allows to eliminate
races between multiple handlers.

The openvswitch kernel module ignores unknown attributes for the
OVS_PACKET_CMD_EXECUTE, so it's safe to provide it even on older
kernels.

Reported-at: https://issues.redhat.com/browse/FDP-1479
Link: https://lore.kernel.org/netdev/20250702155043.2331772-1-i.maximets@ovn.org/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-07-08 13:34:02 +02:00
+								    upcall->pid = 0;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												Extend OVS IPFIX exporter to export tunnel headers

Extend IPFIX exporter to export tunnel headers when both input and output
of the port.
Add three other_config options in IPFIX table: enable-input-sampling,
enable-output-sampling and enable-tunnel-sampling, to control whether
sampling tunnel info, on which direction (input or output).
Insert sampling action before output action and the output tunnel port
is sent to datapath in the sampling action.
Make datapath collect output tunnel info and send it back to userpace
in upcall message with a new additional optional attribute.
Add a tunnel ports map to make the tunnel port lookup faster in sampling
upcalls in IPFIX exporter. Make the IPFIX exporter generate IPFIX template
sets with enterprise elements for the tunnel info, save the tunnel info
in IPFIX cache entries, and send IPFIX DATA with tunnel info.
Add flowDirection element in IPFIX templates.

Signed-off-by: Wenyu Zhang <wenyuz@vmware.com>
Acked-by: Romain Lenglet <rlenglet@vmware.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-17 20:19:36 -07:00
+								    upcall->out_tun_key = NULL;
-												Extend sFlow agent to report tunnel and MPLS structures

Packets are still sampled at ingress only, so the egress
tunnel and/or MPLS structures are only included when there is just 1 output
port.  The actions are either provided by the datapath in the sample upcall
or looked up in the userspace cache.  The former is preferred because it is
more reliable and does not present any new demands or constraints on the
userspace cache, however the code falls back on the userspace lookup so that
this solution can work with existing kernel datapath modules. If the lookup
fails it is not critical: the compiled user-action-cookie is still available
and provides the essential output port and output VLAN forwarding information
just as before.

The openvswitch actions can express almost any tunneling/mangling so the only
totally faithful representation would be to somehow encode the whole list of
flow actions in the sFlow output.  However the standard sFlow tunnel structures
can express most common real-world scenarios, so in parsing the actions we
look for those and skip the encoding if we see anything unusual. For example,
a single set(tunnel()) or tnl_push() is interpreted,  but if a second such
action is encountered then the egress tunnel reporting is suppressed.

The sFlow standard allows "best effort" encoding so that if a field is not
knowable or too onerous to look up then it can be left out. This is often
the case for the layer-4 source port or even the src ip address of a tunnel.
The assumption is that monitoring is enabled everywhere so a missing field
can typically be seen at ingress to the next switch in the path.

This patch also adds unit tests to check the sFlow encoding of set(tunnel()),
tnl_push() and push_mpls() actions.

The netlink attribute to request that actions be included in the upcall
from the datapath is inserted for sFlow sampling only.  To make that option
be explicit would require further changes to the printing and parsing of
actions in lib/odp-util.c, and to scripts in the test suite.

Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT
transformations can follow in future patches that make only incremental
changes.

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
[blp@nicira.com made stylistic and semantic changes]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 21:37:02 -07:00
+								    upcall->actions = NULL;
-												Extend OVS IPFIX exporter to export tunnel headers

Extend IPFIX exporter to export tunnel headers when both input and output
of the port.
Add three other_config options in IPFIX table: enable-input-sampling,
enable-output-sampling and enable-tunnel-sampling, to control whether
sampling tunnel info, on which direction (input or output).
Insert sampling action before output action and the output tunnel port
is sent to datapath in the sampling action.
Make datapath collect output tunnel info and send it back to userpace
in upcall message with a new additional optional attribute.
Add a tunnel ports map to make the tunnel port lookup faster in sampling
upcalls in IPFIX exporter. Make the IPFIX exporter generate IPFIX template
sets with enterprise elements for the tunnel info, save the tunnel info
in IPFIX cache entries, and send IPFIX DATA with tunnel info.
Add flowDirection element in IPFIX templates.

Signed-off-by: Wenyu Zhang <wenyuz@vmware.com>
Acked-by: Romain Lenglet <rlenglet@vmware.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-17 20:19:36 -07:00
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    return 0;
 								}
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
+								static void
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								upcall_xlate(struct udpif *udpif, struct upcall *upcall,
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								             struct ofpbuf *odp_actions, struct flow_wildcards *wc)
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								{
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    struct dpif_flow_stats stats;
-												xlate: auto ofproto trace when recursion too deep

Usually ofproto/trace is used to debug the flow translation error.
When translation error such as recursion too deep or too many resubmit,
the issue might happen momentary; flows causing the recursion expire
when users try to debug it.  This patch enables the ofproto trace
automatically when recursion is too deep or too many resubmit, by
invoking the translation again, and log the ofproto trace as warnings.
Since the log will be huge, rate limit to one per minute.

VMWare-BZ: #2054659
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-28 16:32:27 -08:00
+								    enum xlate_error xerr;
-												upcall: Remove redundant xlate_actions_for_side_effects().

As a result of commit a0bab87 (ofproto: Remove per-flow miss hash
table from upcall handler.) we're guaranteed that every packet has had
xlate_actions() called on it at least once.  Therefore, there's no
need to re-xlate slow path flows just to shove their packets through
the system.

This also may fix a bug discussed here:
http://openvswitch.org/pipermail/discuss/2014-April/013670.html

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Reported-by: Murphy McCauley <murphy.mccauley@gmail.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-22 10:53:27 -07:00
+								    struct xlate_in xin;
-												xlate: auto ofproto trace when recursion too deep

Usually ofproto/trace is used to debug the flow translation error.
When translation error such as recursion too deep or too many resubmit,
the issue might happen momentary; flows causing the recursion expire
when users try to debug it.  This patch enables the ofproto trace
automatically when recursion is too deep or too many resubmit, by
invoking the translation again, and log the ofproto trace as warnings.
Since the log will be huge, rate limit to one per minute.

VMWare-BZ: #2054659
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-28 16:32:27 -08:00
+								    struct ds output;
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    stats.n_packets = 1;
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    stats.n_bytes = dp_packet_size(upcall->packet);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    stats.used = time_msec();
 								    stats.tcp_flags = ntohs(upcall->flow->tcp_flags);
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
-												ofproto: Refactor packet_out handling.

Refactor handle_packet_out() to prepare for bundle support for packet
outs in a later patch.

Two new callbacks are introduced in ofproto-provider class:
->packet_xlate() and ->packet_execute().  ->packet_xlate() translates
the packet using the flow and actions provided by the caller, but
defers all OpenFlow-visible side-effects (stats, learn actions, actual
packet output, etc.) to be explicitly executed with the
->packet_execute() call.

Adds a new ofproto_rule_reduce_timeouts__() that must be called with
'ofproto_mutex' held.  This is used in the next patch.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-09-14 16:51:27 -07:00
+								    xlate_in_init(&xin, upcall->ofproto,
 								                  ofproto_dpif_get_tables_version(upcall->ofproto),
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								                  upcall->flow, upcall->ofp_in_port, NULL,
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
+								                  stats.tcp_flags, upcall->packet, wc, odp_actions);
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    if (upcall->type == MISS_UPCALL) {
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        xin.resubmit_stats = &stats;
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
-												ofproto-dpif-xlate: Distinguish "freezing" from "recirculation".

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-02-16 10:51:58 -08:00
+								        if (xin.frozen_state) {
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								            /* We may install a datapath flow only if we get a reference to the
 								             * recirculation context (otherwise we could have recirculation
 								             * upcalls using recirculation ID for which no context can be
 								             * found).  We may still execute the flow's actions even if we
 								             * don't install the flow. */
-												ofproto-dpif-xlate: Distinguish "freezing" from "recirculation".

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-02-16 10:51:58 -08:00
+								            upcall->recirc = recirc_id_node_from_state(xin.frozen_state);
-												ofproto-dpif-xlate: Put recirc_state, not recirc_id_node, in xlate_in.

This will make it possible, in an upcoming commit, to construct a
recirc_state locally on the stack to pass to xlate_actions().  It would
also be possible to construct and pass a recirc_id_node on the stack, but
the translation process only uses the recirc_state anyway.  The alternative
here of having upcall_xlate() know that it can recover the recirc_id_node
from the recirc_state isn't great either; it's debatable which is the
better approach.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-20 16:53:01 -08:00
+								            upcall->have_recirc_ref = recirc_id_node_try_ref_rcu(upcall->recirc);
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								        }
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
+								    } else {
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								        /* For non-miss upcalls, we are either executing actions (one of which
 								         * is an userspace action) for an upcall, in which case the stats have
 								         * already been taken care of, or there's a flow in the datapath which
 								         * this packet was accounted to.  Presumably the revalidators will deal
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
+								         * with pushing its stats eventually. */
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    upcall->reval_seq = seq_read(udpif->reval_seq);
-												ofproto-dpif-upcall: Translate input port as part of upcall translation.

When we generate wildcards for upcalled flows, the flows and therefore
the wildcards, are in OpenFlow format. These are mostly the same but
one exception is the input port. We work around this problem by simply
performing an exact match on the input port when generating netlink
formatted keys. (This does not lose any information in practice because
action translation also always exact matches on input port.)

While this works fine for kernel based flows, it misses the userspace
datapath, which directly consumes the OFP format mask for the input
port. The effect of this is that the in_port mask is sometimes only
the lower 16 bits of the field. (This is because OFP format is a 16-bit
value stored in a 32-bit field. The full width of the field is initialized
with an exact match mask but certain operations result in cleaving this
down to 16 bits.) In practice this does not cause a problem because datapath
port numbers are almost always in the lower 16 bits of the range anyways.

This moves the masking of the datapath format field to translation so that
all datapaths see the same result. This also makes more sense conceptually
as the input port in the flow is also in ODP format at this stage.

Signed-off-by: Jesse Gross <jesse@kernel.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-06-09 13:18:45 -07:00
-												xlate: auto ofproto trace when recursion too deep

Usually ofproto/trace is used to debug the flow translation error.
When translation error such as recursion too deep or too many resubmit,
the issue might happen momentary; flows causing the recursion expire
when users try to debug it.  This patch enables the ofproto trace
automatically when recursion is too deep or too many resubmit, by
invoking the translation again, and log the ofproto trace as warnings.
Since the log will be huge, rate limit to one per minute.

VMWare-BZ: #2054659
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-28 16:32:27 -08:00
+								    xerr = xlate_actions(&xin, &upcall->xout);
 								    /* Translate again and log the ofproto trace for
 								     * these two error types. */
 								    if (xerr == XLATE_RECURSION_TOO_DEEP ||
 								        xerr == XLATE_TOO_MANY_RESUBMITS) {
 								        static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 1);
 								        /* This is a huge log, so be conservative. */
 								        if (!VLOG_DROP_WARN(&rll)) {
 								            ds_init(&output);
 								            ofproto_trace(upcall->ofproto, upcall->flow,
-												ofproto-dpif-trace: add --name option for ofproto/trace.

Most of commands in ovs-ofctl and ovs-appctl can display port names
instead of port numbers by using --names option. This change adds
similar functionality to ofproto/trace.

For backward compatibility, the default behavior is the same as
before.

Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-08-26 16:48:53 +09:00
+								                          upcall->packet, NULL, 0, NULL, &output,
 								                          false);
-												xlate: auto ofproto trace when recursion too deep

Usually ofproto/trace is used to debug the flow translation error.
When translation error such as recursion too deep or too many resubmit,
the issue might happen momentary; flows causing the recursion expire
when users try to debug it.  This patch enables the ofproto trace
automatically when recursion is too deep or too many resubmit, by
invoking the translation again, and log the ofproto trace as warnings.
Since the log will be huge, rate limit to one per minute.

VMWare-BZ: #2054659
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-28 16:32:27 -08:00
+								            VLOG_WARN("%s", ds_cstr(&output));
 								            ds_destroy(&output);
 								        }
 								    }
-												ofproto-dpif-upcall: Translate input port as part of upcall translation.

When we generate wildcards for upcalled flows, the flows and therefore
the wildcards, are in OpenFlow format. These are mostly the same but
one exception is the input port. We work around this problem by simply
performing an exact match on the input port when generating netlink
formatted keys. (This does not lose any information in practice because
action translation also always exact matches on input port.)

While this works fine for kernel based flows, it misses the userspace
datapath, which directly consumes the OFP format mask for the input
port. The effect of this is that the in_port mask is sometimes only
the lower 16 bits of the field. (This is because OFP format is a 16-bit
value stored in a 32-bit field. The full width of the field is initialized
with an exact match mask but certain operations result in cleaving this
down to 16 bits.) In practice this does not cause a problem because datapath
port numbers are almost always in the lower 16 bits of the range anyways.

This moves the masking of the datapath format field to translation so that
all datapaths see the same result. This also makes more sense conceptually
as the input port in the flow is also in ODP format at this stage.

Signed-off-by: Jesse Gross <jesse@kernel.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-06-09 13:18:45 -07:00
+								    if (wc) {
 								        /* Convert the input port wildcard from OFP to ODP format. There's no
 								         * real way to do this for arbitrary bitmasks since the numbering spaces
 								         * aren't the same. However, flow translation always exact matches the
 								         * whole thing, so we can do the same here. */
 								        WC_MASK_FIELD(wc, in_port.odp_port);
 								    }
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    upcall->xout_initialized = true;
-												ofproto-dpif-upcall: Slow path flows that datapath can't fully match.

In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:

    - The flow lacks one or more fields that userspace expects to be there,
      that is, the datapath doesn't understand or parse them but userspace
      does.  This is, for example, what would happen if current OVS
      userspace, which understands and extracts TCP flags, were to be
      paired with an older OVS kernel module, which does not.  Internally
      OVS uses the name ODP_FIT_TOO_LITTLE for this situation.

    - The flow includes fields that userspace does not know about, that is,
      the datapath understands and parses them but userspace does not.
      This is, for example, what would happen if an old OVS userspace that
      does not understand or extract TCP flags, were to be paired with a
      recent OVS kernel module that does.  Internally, OVS uses the name
      ODP_FIT_TOO_MUCH for this situation.

The latter is not a big deal and OVS doesn't have to do much to cope with
it.

The former is more of a problem.  When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path.  Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.

Somehow, this nuance was lost a long time.  From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE.  Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it.  I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix.  This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.

CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-24 11:40:19 -08:00
+								    if (upcall->fitness == ODP_FIT_TOO_LITTLE) {
 								        upcall->xout.slow |= SLOW_MATCH;
 								    }
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    if (!upcall->xout.slow) {
 								        ofpbuf_use_const(&upcall->put_actions,
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
+								                         odp_actions->data, odp_actions->size);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    } else {
-												ofproto: Allow xlate_actions() to fail.

Sometimes xlate_actions() fails due to too deep recursion, too many
MPLS labels, or missing recirculation context.  Make xlate_actions()
clear out the produced odp actions in these cases to make it easy for
the caller to install a drop flow (instead or installing a flow with
partially translated actions).  Also, return a specific error code, so
that the error can be properly propagated where meaningful.

There are may cases in which the NORMAL action decides to drop the
packet.  Most of these are not, however, traslation errors, but just
reactions to malformed input. In these cases it is correct to make the
NORMAL action do nothing, but allow other actions in the pipeline (if
any) to take effect.

Before this patch it was possible that the revalidation installed a
flow with a recirculation ID with an invalid recirc ID (== 0), due to
the introduction of in-place modification in commit 43b2f131a229
(ofproto: Allow in-place modifications of datapath flows).

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								        /* upcall->put_actions already initialized by upcall_receive(). */
-												dpif: Remove support for multiple queues per port.

Commit 69c51582ff78 ("dpif-netlink: don't allocate per thread netlink
sockets") removed dpif-netlink support for multiple queues per port.
No remaining dpif provider supports multiple queues per port, so
remove infrastructure for the feature.

CC: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-09-25 15:14:13 -07:00
+								        compose_slow_path(udpif, &upcall->xout,
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								                          upcall->flow->in_port.odp_port, upcall->ofp_in_port,
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								                          &upcall->put_actions,
 								                          upcall->ofproto->up.slowpath_meter_id,
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								                          &upcall->ofproto->uuid);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    }
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
-												ofproto-dpif-upcall: Create ukey only for miss upcalls.

There is no point trying to create ukeys for non-miss upcalls, such as
upcalls due to an explicit userspace actions (e.g, slow-pathed flows).

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-02-24 15:33:59 -08:00
+								    /* This function is also called for slow-pathed flows.  As we are only
 								     * going to create new datapath flows for actual datapath misses, there is
 								     * no point in creating a ukey otherwise. */
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    if (upcall->type == MISS_UPCALL) {
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								        upcall->ukey = ukey_create_from_upcall(upcall, wc);
-												ofproto-dpif-upcall: Create ukey only for miss upcalls.

There is no point trying to create ukeys for non-miss upcalls, such as
upcalls due to an explicit userspace actions (e.g, slow-pathed flows).

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-02-24 15:33:59 -08:00
+								    }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								}
-												ofproto-dpif-upcall: Fix sparse warnings.

Fixes these warnings from "sparse":

../ofproto/ofproto-dpif-upcall.c:761:1: warning: symbol 'free_upcall' was
    not declared. Should it be static?
../ofproto/ofproto-dpif-upcall.c:849:1: warning: symbol 'convert_upcall'
    was not declared. Should it be static?

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2014-07-26 12:19:03 -07:00
+								static void
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								upcall_uninit(struct upcall *upcall)
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								{
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    if (upcall) {
 								        if (upcall->xout_initialized) {
 								            xlate_out_uninit(&upcall->xout);
 								        }
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
+								        ofpbuf_uninit(&upcall->odp_actions);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        ofpbuf_uninit(&upcall->put_actions);
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								        if (upcall->ukey) {
 								            if (!upcall->ukey_persists) {
 								                ukey_delete__(upcall->ukey);
 								            }
 								        } else if (upcall->have_recirc_ref) {
 								            /* The reference was transferred to the ukey if one was created. */
 								            recirc_id_node_unref(upcall->recirc);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        }
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    }
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								}
-												upcall: Log failure to flow_put for dpif-netlink.

Previously these errors were only logged for dpif-netdev. Make it
consistent by merging the code for both datapaths.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-18 14:50:09 -07:00
+								/* If there are less flows than the limit, and this is a miss upcall which
 								 *
 								 *      - Has no recirc_id, OR
 								 *      - Has a recirc_id and we can get a reference on the recirc ctx,
 								 *
 								 * Then we should install the flow (true). Otherwise, return false. */
 								static bool
 								should_install_flow(struct udpif *udpif, struct upcall *upcall)
 								{
 								    unsigned int flow_limit;
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    if (upcall->type != MISS_UPCALL) {
-												upcall: Log failure to flow_put for dpif-netlink.

Previously these errors were only logged for dpif-netdev. Make it
consistent by merging the code for both datapaths.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-18 14:50:09 -07:00
+								        return false;
 								    } else if (upcall->recirc && !upcall->have_recirc_ref) {
-												upcall: reduce log level for "no reference to recirc flow" message

Reduce log level from "warn" to "debug" for "upcall: no reference to
recirc flow" log message.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Lance Richardson <lrichard@redhat.com>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
											
										
										
											2016-08-29 17:06:13 -04:00
+								        VLOG_DBG_RL(&rl, "upcall: no reference for recirc flow");
-												upcall: Log failure to flow_put for dpif-netlink.

Previously these errors were only logged for dpif-netdev. Make it
consistent by merging the code for both datapaths.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-18 14:50:09 -07:00
+								        return false;
 								    }
 								    atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
 								    if (udpif_get_n_flows(udpif) >= flow_limit) {
-												ofproto: report coverage on hitting datapath flow limit

Whenever the number of flows in the datapath crosses above
the flow limit set/autoconfigured, it is helpful to report
this event through coverage counter for an operator/devops
engineer to know and take proactive corrections in the
switch configuration.

Today, these events are reported in ovs vswitch log when
a new flow can not be inserted in upcall processing in which
case ovs writes a warning, otherwise an auto correction
made by ovs to flush old flows without any intimation at all.

Signed-off-by: Gowrishankar Muthukrishnan <gmuthukr@redhat.com>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-04-20 19:13:42 +05:30
+								        COVERAGE_INC(upcall_flow_limit_hit);
-												ofproto-dpif-upcall: Log the value of flow limit.

The datapath flow limit is calculated by revalidators so
log the value as well.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-09-29 17:07:31 -03:00
+								        VLOG_WARN_RL(&rl,
 								                     "upcall: datapath reached the dynamic limit of %u flows.",
 								                     flow_limit);
-												upcall: Log failure to flow_put for dpif-netlink.

Previously these errors were only logged for dpif-netdev. Make it
consistent by merging the code for both datapaths.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-18 14:50:09 -07:00
+								        return false;
 								    }
 								    return true;
 								}
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								static int
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								          unsigned pmd_id, enum dpif_upcall_type type,
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								          const struct nlattr *userdata, struct ofpbuf *actions,
 								          struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								{
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    struct udpif *udpif = aux;
 								    struct upcall upcall;
 								    bool megaflow;
 								    int error;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								    atomic_read_relaxed(&enable_megaflows, &megaflow);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    error = upcall_receive(&upcall, udpif->backer, packet, type, userdata,
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								                           flow, 0, ufid, pmd_id, NULL);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    if (error) {
-												ofproto-dpif-upcall: Fix a free of uninitialized memory.

On current master, when 'upcall_receive()' returns error, the
ofpbuf 'upcall->put_actions' is uninitialized.  In some usecase,
the failure of 'upcall_receive()' will cause uninitialize of
'upcall->put_actions' and free of uninitialized pointer.

This commit fixes the issue by making the caller not conduct
the uninitialize of the 'upcall' when there is error.

Found by inspection.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-08 10:41:36 -07:00
+								        return error;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    }
-												ofproto-dpif-upcall: Fix using uninitialized fitness.

'upcall_xlate()' makes a decision to compose slow path actions
by checking the 'upcall->fitness', which is not initialized in
case of calling from the 'upcall_cb()'.

'upcall_cb()' receives the real flow, so the fitness should be
initialized as perfect.

Fixes following tests on travis:

    ofproto-dpif.at: ofproto-dpif megaflow - disabled - pmd
    ofproto-dpif.at: ofproto-dpif - conntrack - output action

CC: Ben Pfaff <blp@ovn.org>
Fixes: 687bafbb8a79 ("ofproto-dpif-upcall: Slow path flows that
                      datapath can't fully match.")
Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-26 11:10:11 +03:00
+								    upcall.fitness = ODP_FIT_PERFECT;
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								    error = process_upcall(udpif, &upcall, actions, wc);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    if (error) {
 								        goto out;
 								    }
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    if (upcall.xout.slow && put_actions) {
-												ofpbuf: Simplify ofpbuf API.

ofpbuf was complicated due to its wide usage across all
layers of OVS, Now we have introduced independent dp_packet
which can be used for datapath packet, we can simplify ofpbuf.
Following patch removes DPDK mbuf and access API of ofpbuf
members.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-03-02 17:29:44 -08:00
+								        ofpbuf_put(put_actions, upcall.put_actions.data,
 								                   upcall.put_actions.size);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    }
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												ovs-vswitchd: Avoid segfault for "netdev" datapath.

When the datapath, whose type is "netdev", processes packets
in userspce action, it may cause a segmentation fault. In the
dp_execute_userspace_action(), we pass the "wc" argument to
dp_netdev_upcall() using NULL. In the dp_netdev_upcall() call tree,
the "wc" will be used. For example, dp_netdev_upcall() uses the
&wc->masks for debugging, and flow_wildcards_init_for_packet()
uses the  "wc" if we disable megaflow, which is described in
more detail below.

Segmentation fault in flow_wildcards_init_for_packet:

    #0  0x0000000000468fe8 flow_wildcards_init_for_packet lib/flow.c:1275
    #1  0x0000000000436c0b upcall_cb ofproto/ofproto-dpif-upcall.c:1231
    #2  0x000000000045bd96 dp_netdev_upcall lib/dpif-netdev.c:3857
    #3  0x0000000000461bf3 dp_execute_userspace_action lib/dpif-netdev.c:4388
    #4  dp_execute_cb lib/dpif-netdev.c:4521
    #5  0x0000000000486ae2 odp_execute_actions lib/odp-execute.c:538
    #6  0x00000000004607f9 dp_netdev_execute_actions lib/dpif-netdev.c:4627
    #7  packet_batch_per_flow_execute lib/dpif-netdev.c:3927
    #8  dp_netdev_input__ lib/dpif-netdev.c:4229
    #9  0x0000000000460ba8 dp_netdev_input lib/dpif-netdev.c:4238
    #10 dp_netdev_process_rxq_port lib/dpif-netdev.c:2873
    #11 0x000000000046126e dpif_netdev_run lib/dpif-netdev.c:3000
    #12 0x000000000042baf5 type_run ofproto/ofproto-dpif.c:504
    #13 0x00000000004192bf ofproto_type_run ofproto/ofproto.c:1687
    #14 0x0000000000409965 bridge_run__ vswitchd/bridge.c:2875
    #15 0x000000000040f145 bridge_run vswitchd/bridge.c:2938
    #16 0x00000000004062e5 main vswitchd/ovs-vswitchd.c:111

Signed-off-by: nickcooper-zhangtonghao <nic@opencloud.tech>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-12-07 10:04:04 -08:00
+								    if (OVS_UNLIKELY(!megaflow && wc)) {
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								        flow_wildcards_init_for_packet(wc, flow);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    }
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
-												upcall: Log failure to flow_put for dpif-netlink.

Previously these errors were only logged for dpif-netdev. Make it
consistent by merging the code for both datapaths.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-18 14:50:09 -07:00
+								    if (!should_install_flow(udpif, &upcall)) {
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        error = ENOSPC;
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								        goto out;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    }
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								    if (upcall.ukey && !ukey_install(udpif, upcall.ukey)) {
 								        error = ENOSPC;
 								    }
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								out:
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    if (!error) {
 								        upcall.ukey_persists = true;
 								    }
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								    upcall_uninit(&upcall);
 								    return error;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								}
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
-												ofproto-dpif-ipfix: add support for per-flow drop counters

Patch based on RFC 5102, section 5.10. It implements per-flow drop counters:
- droppedPacketDeltaCount
- droppedPacketTotalCount
- droppedOctetDeltaCount
- droppedOctetTotalCount

In order to determine if packet is going to be dropped, flow actions associated
with packet are read. If at least one of the following conditions is met,
packet is not marked as dropped.

 Packet has at least one:
 - OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_CLONE action with nested OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_SAMPLE action with nested OVS_ACTION_ATTR_OUTPUT action and
   sampling probability is set to 100%

Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-28 07:17:44 +01:00
+								static size_t
 								dpif_get_actions(struct udpif *udpif, struct upcall *upcall,
 								                 const struct nlattr **actions)
 								{
 								    size_t actions_len = 0;
 								    if (upcall->actions) {
 								        /* Actions were passed up from datapath. */
 								        *actions = nl_attr_get(upcall->actions);
 								        actions_len = nl_attr_get_size(upcall->actions);
 								    }
 								    if (actions_len == 0) {
 								        /* Lookup actions in userspace cache. */
 								        struct udpif_key *ukey = ukey_lookup(udpif, upcall->ufid,
 								                                             upcall->pmd_id);
 								        if (ukey) {
 								            ukey_get_actions(ukey, actions, &actions_len);
 								        }
 								    }
 								    return actions_len;
 								}
 								static size_t
 								dpif_read_actions(struct udpif *udpif, struct upcall *upcall,
 								                  const struct flow *flow, enum upcall_type type,
 								                  void *upcall_data)
 								{
 								    const struct nlattr *actions = NULL;
 								    size_t actions_len = dpif_get_actions(udpif, upcall, &actions);
 								    if (!actions || !actions_len) {
 								        return 0;
 								    }
 								    switch (type) {
 								    case SFLOW_UPCALL:
-												ofproto-dpif-sflow: Recursively examine actions inside clone.

Until now, dpif_sflow_read_actions() has ignored actions inside clone.
This means that sflow missed tnl_push actions inside clone, which OVS
now uses to avoid tx recirculation.  This commit fixes the problem
by making dpif_sflow_read_actions() recursively process actions inside
clone.

In addition, some sflow data needs to be stored and restored in
ofproto-dpif-xlate when native_tunnel_output() is invoked. Otherwise the
output action of underlay bridge is getting counted too when sFlow is set
on the overlay bridge.

Both bugs are connected to sflows and were introduced by the commit in
the "Fixes:" tag below.

Signed-off-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
CC: Sugesh Chandran <sugesh.chandran@intel.com>
Fixes: 7c12dfc527a5 ("tunneling: Avoid datapath-recirc by combining recirc actions at xlate.")
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-09 19:54:31 +01:00
+								        dpif_sflow_read_actions(flow, actions, actions_len, upcall_data, true);
-												ofproto-dpif-ipfix: add support for per-flow drop counters

Patch based on RFC 5102, section 5.10. It implements per-flow drop counters:
- droppedPacketDeltaCount
- droppedPacketTotalCount
- droppedOctetDeltaCount
- droppedOctetTotalCount

In order to determine if packet is going to be dropped, flow actions associated
with packet are read. If at least one of the following conditions is met,
packet is not marked as dropped.

 Packet has at least one:
 - OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_CLONE action with nested OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_SAMPLE action with nested OVS_ACTION_ATTR_OUTPUT action and
   sampling probability is set to 100%

Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-28 07:17:44 +01:00
+								        break;
 								    case FLOW_SAMPLE_UPCALL:
 								    case IPFIX_UPCALL:
 								        dpif_ipfix_read_actions(flow, actions, actions_len, upcall_data);
 								        break;
 								    case BAD_UPCALL:
 								    case MISS_UPCALL:
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    case SLOW_PATH_UPCALL:
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								    case CONTROLLER_UPCALL:
-												ofproto-dpif-ipfix: add support for per-flow drop counters

Patch based on RFC 5102, section 5.10. It implements per-flow drop counters:
- droppedPacketDeltaCount
- droppedPacketTotalCount
- droppedOctetDeltaCount
- droppedOctetTotalCount

In order to determine if packet is going to be dropped, flow actions associated
with packet are read. If at least one of the following conditions is met,
packet is not marked as dropped.

 Packet has at least one:
 - OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_CLONE action with nested OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_SAMPLE action with nested OVS_ACTION_ATTR_OUTPUT action and
   sampling probability is set to 100%

Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-28 07:17:44 +01:00
+								    default:
 								        break;
 								    }
 								    return actions_len;
 								}
-												ofproto-dpif-upcall: Fix sparse warnings.

Fixes these warnings from "sparse":

../ofproto/ofproto-dpif-upcall.c:761:1: warning: symbol 'free_upcall' was
    not declared. Should it be static?
../ofproto/ofproto-dpif-upcall.c:849:1: warning: symbol 'convert_upcall'
    was not declared. Should it be static?

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2014-07-26 12:19:03 -07:00
+								static int
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								process_upcall(struct udpif *udpif, struct upcall *upcall,
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								               struct ofpbuf *odp_actions, struct flow_wildcards *wc)
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								{
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    const struct dp_packet *packet = upcall->packet;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    const struct flow *flow = upcall->flow;
-												ofproto-dpif-ipfix: add support for per-flow drop counters

Patch based on RFC 5102, section 5.10. It implements per-flow drop counters:
- droppedPacketDeltaCount
- droppedPacketTotalCount
- droppedOctetDeltaCount
- droppedOctetTotalCount

In order to determine if packet is going to be dropped, flow actions associated
with packet are read. If at least one of the following conditions is met,
packet is not marked as dropped.

 Packet has at least one:
 - OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_CLONE action with nested OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_SAMPLE action with nested OVS_ACTION_ATTR_OUTPUT action and
   sampling probability is set to 100%

Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-28 07:17:44 +01:00
+								    size_t actions_len = 0;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    switch (upcall->type) {
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    case MISS_UPCALL:
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								    case SLOW_PATH_UPCALL:
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								        upcall_xlate(udpif, upcall, odp_actions, wc);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        return 0;
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    case SFLOW_UPCALL:
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        if (upcall->sflow) {
-												Extend sFlow agent to report tunnel and MPLS structures

Packets are still sampled at ingress only, so the egress
tunnel and/or MPLS structures are only included when there is just 1 output
port.  The actions are either provided by the datapath in the sample upcall
or looked up in the userspace cache.  The former is preferred because it is
more reliable and does not present any new demands or constraints on the
userspace cache, however the code falls back on the userspace lookup so that
this solution can work with existing kernel datapath modules. If the lookup
fails it is not critical: the compiled user-action-cookie is still available
and provides the essential output port and output VLAN forwarding information
just as before.

The openvswitch actions can express almost any tunneling/mangling so the only
totally faithful representation would be to somehow encode the whole list of
flow actions in the sFlow output.  However the standard sFlow tunnel structures
can express most common real-world scenarios, so in parsing the actions we
look for those and skip the encoding if we see anything unusual. For example,
a single set(tunnel()) or tnl_push() is interpreted,  but if a second such
action is encountered then the egress tunnel reporting is suppressed.

The sFlow standard allows "best effort" encoding so that if a field is not
knowable or too onerous to look up then it can be left out. This is often
the case for the layer-4 source port or even the src ip address of a tunnel.
The assumption is that monitoring is enabled everywhere so a missing field
can typically be seen at ingress to the next switch in the path.

This patch also adds unit tests to check the sFlow encoding of set(tunnel()),
tnl_push() and push_mpls() actions.

The netlink attribute to request that actions be included in the upcall
from the datapath is inserted for sFlow sampling only.  To make that option
be explicit would require further changes to the printing and parsing of
actions in lib/odp-util.c, and to scripts in the test suite.

Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT
transformations can follow in future patches that make only incremental
changes.

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
[blp@nicira.com made stylistic and semantic changes]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 21:37:02 -07:00
+								            struct dpif_sflow_actions sflow_actions;
-												ofproto-dpif-ipfix: add support for per-flow drop counters

Patch based on RFC 5102, section 5.10. It implements per-flow drop counters:
- droppedPacketDeltaCount
- droppedPacketTotalCount
- droppedOctetDeltaCount
- droppedOctetTotalCount

In order to determine if packet is going to be dropped, flow actions associated
with packet are read. If at least one of the following conditions is met,
packet is not marked as dropped.

 Packet has at least one:
 - OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_CLONE action with nested OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_SAMPLE action with nested OVS_ACTION_ATTR_OUTPUT action and
   sampling probability is set to 100%

Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-28 07:17:44 +01:00
-												Extend sFlow agent to report tunnel and MPLS structures

Packets are still sampled at ingress only, so the egress
tunnel and/or MPLS structures are only included when there is just 1 output
port.  The actions are either provided by the datapath in the sample upcall
or looked up in the userspace cache.  The former is preferred because it is
more reliable and does not present any new demands or constraints on the
userspace cache, however the code falls back on the userspace lookup so that
this solution can work with existing kernel datapath modules. If the lookup
fails it is not critical: the compiled user-action-cookie is still available
and provides the essential output port and output VLAN forwarding information
just as before.

The openvswitch actions can express almost any tunneling/mangling so the only
totally faithful representation would be to somehow encode the whole list of
flow actions in the sFlow output.  However the standard sFlow tunnel structures
can express most common real-world scenarios, so in parsing the actions we
look for those and skip the encoding if we see anything unusual. For example,
a single set(tunnel()) or tnl_push() is interpreted,  but if a second such
action is encountered then the egress tunnel reporting is suppressed.

The sFlow standard allows "best effort" encoding so that if a field is not
knowable or too onerous to look up then it can be left out. This is often
the case for the layer-4 source port or even the src ip address of a tunnel.
The assumption is that monitoring is enabled everywhere so a missing field
can typically be seen at ingress to the next switch in the path.

This patch also adds unit tests to check the sFlow encoding of set(tunnel()),
tnl_push() and push_mpls() actions.

The netlink attribute to request that actions be included in the upcall
from the datapath is inserted for sFlow sampling only.  To make that option
be explicit would require further changes to the printing and parsing of
actions in lib/odp-util.c, and to scripts in the test suite.

Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT
transformations can follow in future patches that make only incremental
changes.

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
[blp@nicira.com made stylistic and semantic changes]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 21:37:02 -07:00
+								            memset(&sflow_actions, 0, sizeof sflow_actions);
-												ofproto-dpif-ipfix: add support for per-flow drop counters

Patch based on RFC 5102, section 5.10. It implements per-flow drop counters:
- droppedPacketDeltaCount
- droppedPacketTotalCount
- droppedOctetDeltaCount
- droppedOctetTotalCount

In order to determine if packet is going to be dropped, flow actions associated
with packet are read. If at least one of the following conditions is met,
packet is not marked as dropped.

 Packet has at least one:
 - OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_CLONE action with nested OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_SAMPLE action with nested OVS_ACTION_ATTR_OUTPUT action and
   sampling probability is set to 100%

Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-28 07:17:44 +01:00
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								            actions_len = dpif_read_actions(udpif, upcall, flow,
 								                                            upcall->type, &sflow_actions);
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								            dpif_sflow_received(upcall->sflow, packet, flow,
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								                                flow->in_port.odp_port, &upcall->cookie,
-												Extend sFlow agent to report tunnel and MPLS structures

Packets are still sampled at ingress only, so the egress
tunnel and/or MPLS structures are only included when there is just 1 output
port.  The actions are either provided by the datapath in the sample upcall
or looked up in the userspace cache.  The former is preferred because it is
more reliable and does not present any new demands or constraints on the
userspace cache, however the code falls back on the userspace lookup so that
this solution can work with existing kernel datapath modules. If the lookup
fails it is not critical: the compiled user-action-cookie is still available
and provides the essential output port and output VLAN forwarding information
just as before.

The openvswitch actions can express almost any tunneling/mangling so the only
totally faithful representation would be to somehow encode the whole list of
flow actions in the sFlow output.  However the standard sFlow tunnel structures
can express most common real-world scenarios, so in parsing the actions we
look for those and skip the encoding if we see anything unusual. For example,
a single set(tunnel()) or tnl_push() is interpreted,  but if a second such
action is encountered then the egress tunnel reporting is suppressed.

The sFlow standard allows "best effort" encoding so that if a field is not
knowable or too onerous to look up then it can be left out. This is often
the case for the layer-4 source port or even the src ip address of a tunnel.
The assumption is that monitoring is enabled everywhere so a missing field
can typically be seen at ingress to the next switch in the path.

This patch also adds unit tests to check the sFlow encoding of set(tunnel()),
tnl_push() and push_mpls() actions.

The netlink attribute to request that actions be included in the upcall
from the datapath is inserted for sFlow sampling only.  To make that option
be explicit would require further changes to the printing and parsing of
actions in lib/odp-util.c, and to scripts in the test suite.

Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT
transformations can follow in future patches that make only incremental
changes.

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
[blp@nicira.com made stylistic and semantic changes]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 21:37:02 -07:00
+								                                actions_len > 0 ? &sflow_actions : NULL);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								        }
 								        break;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    case IPFIX_UPCALL:
 								    case FLOW_SAMPLE_UPCALL:
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        if (upcall->ipfix) {
-												ipfix: Support tunnel information for Flow IPFIX.

Add support to export tunnel information for flow-based IPFIX.
The original steps to configure flow level IPFIX:
    1) Create a new record in Flow_Sample_Collector_Set table:
       'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
    2) Add IPFIX configuration which is referred by corresponding
       row in Flow_Sample_Collector_Set table:
       'ovs-vsctl -- set Flow_Sample_Collector_Set
       "Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX
       targets=\"IP:4739\" obs_domain_id=123 obs_point_id=456
       cache_active_timeout=60 cache_max_flows=13'
    3) Add sample action to the flows:
       'ovs-ofctl add-flow mybridge in_port=1,
       actions=sample'('probability=65535,collector_set_id=1,
       obs_domain_id=123,obs_point_id=456')',output:3'
NXAST_SAMPLE action was used in step 3. In order to support exporting tunnel
information, the NXAST_SAMPLE2 action was added and with NXAST_SAMPLE2 action
in this patch, the step 3 should be configured like below:
       'ovs-ofctl add-flow mybridge in_port=1,
       actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
       obs_point_id=456,sampling_port=3')',output:3'
'sampling_port' can be equal to ingress port or one of egress ports. If sampling
port is equal to output port and the output port is a tunnel port,
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT will be set in the datapath flow sample action.
When flow sample action upcall happens, tunnel information will be retrieved from
the datapath and then IPFIX can export egress tunnel port information. If
samping_port=65535 (OFPP_NONE), flow-based IPFIX will keep the same behavior
as before.

This patch mainly do three tasks:
    1) Add a new flow sample action NXAST_SAMPLE2 to support exporting
       tunnel information. NXAST_SAMPLE2 action has a new added field
       'sampling_port'.
    2) Use 'other_configure: enable-tunnel-sampling' to enable or disable
       exporting tunnel information.
    3) If 'sampling_port' is equal to output port and output port is a tunnel
       port, the translation of OpenFlow "sample" action should first emit
       set(tunnel(...)), then the sample action itself. It makes sure the
       egress tunnel information can be sampled.
    4) Add a test of flow-based IPFIX for tunnel set.

How to test flow-based IPFIX:
    1) Setup a test environment with two Linux host with Docker supported
    2) Create a Docker container and a GRE tunnel port on each host
    3) Use ovs-docker to add the container on the bridge
    4) Listen on port 4739 on the collector machine and use wireshark to filter
       'cflow' packets.
    5) Configure flow-based IPFIX:
       - 'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
       - 'ovs-vsctl -- set Flow_Sample_Collector_Set
          "Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX \
          targets=\"IP:4739\" cache_active_timeout=60 cache_max_flows=13 \
          other_config:enable-tunnel-sampling=true'
       - 'ovs-ofctl add-flow mybridge in_port=1,
          actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
          obs_point_id=456,sampling_port=3')',output:3'
       Note: The in-port is container port. The output port and sampling_port
             are both open flow port and the output port is a GRE tunnel port.
    6) Ping from the container whose host enabled flow-based IPFIX.
    7) Get the IPFIX template pakcets and IPFIX information packets.

Signed-off-by: Benli Ye <daniely@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-06-14 16:53:34 +08:00
+								            struct flow_tnl output_tunnel_key;
-												ofproto-dpif-ipfix: add support for per-flow drop counters

Patch based on RFC 5102, section 5.10. It implements per-flow drop counters:
- droppedPacketDeltaCount
- droppedPacketTotalCount
- droppedOctetDeltaCount
- droppedOctetTotalCount

In order to determine if packet is going to be dropped, flow actions associated
with packet are read. If at least one of the following conditions is met,
packet is not marked as dropped.

 Packet has at least one:
 - OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_CLONE action with nested OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_SAMPLE action with nested OVS_ACTION_ATTR_OUTPUT action and
   sampling probability is set to 100%

Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-28 07:17:44 +01:00
+								            struct dpif_ipfix_actions ipfix_actions;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												ofproto-dpif-ipfix: add support for per-flow drop counters

Patch based on RFC 5102, section 5.10. It implements per-flow drop counters:
- droppedPacketDeltaCount
- droppedPacketTotalCount
- droppedOctetDeltaCount
- droppedOctetTotalCount

In order to determine if packet is going to be dropped, flow actions associated
with packet are read. If at least one of the following conditions is met,
packet is not marked as dropped.

 Packet has at least one:
 - OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_CLONE action with nested OVS_ACTION_ATTR_OUTPUT action
 - OVS_ACTION_ATTR_SAMPLE action with nested OVS_ACTION_ATTR_OUTPUT action and
   sampling probability is set to 100%

Signed-off-by: Przemyslaw Szczerbik <przemyslawx.szczerbik@intel.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-28 07:17:44 +01:00
+								            memset(&ipfix_actions, 0, sizeof ipfix_actions);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												ipfix: Support tunnel information for Flow IPFIX.

Add support to export tunnel information for flow-based IPFIX.
The original steps to configure flow level IPFIX:
    1) Create a new record in Flow_Sample_Collector_Set table:
       'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
    2) Add IPFIX configuration which is referred by corresponding
       row in Flow_Sample_Collector_Set table:
       'ovs-vsctl -- set Flow_Sample_Collector_Set
       "Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX
       targets=\"IP:4739\" obs_domain_id=123 obs_point_id=456
       cache_active_timeout=60 cache_max_flows=13'
    3) Add sample action to the flows:
       'ovs-ofctl add-flow mybridge in_port=1,
       actions=sample'('probability=65535,collector_set_id=1,
       obs_domain_id=123,obs_point_id=456')',output:3'
NXAST_SAMPLE action was used in step 3. In order to support exporting tunnel
information, the NXAST_SAMPLE2 action was added and with NXAST_SAMPLE2 action
in this patch, the step 3 should be configured like below:
       'ovs-ofctl add-flow mybridge in_port=1,
       actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
       obs_point_id=456,sampling_port=3')',output:3'
'sampling_port' can be equal to ingress port or one of egress ports. If sampling
port is equal to output port and the output port is a tunnel port,
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT will be set in the datapath flow sample action.
When flow sample action upcall happens, tunnel information will be retrieved from
the datapath and then IPFIX can export egress tunnel port information. If
samping_port=65535 (OFPP_NONE), flow-based IPFIX will keep the same behavior
as before.

This patch mainly do three tasks:
    1) Add a new flow sample action NXAST_SAMPLE2 to support exporting
       tunnel information. NXAST_SAMPLE2 action has a new added field
       'sampling_port'.
    2) Use 'other_configure: enable-tunnel-sampling' to enable or disable
       exporting tunnel information.
    3) If 'sampling_port' is equal to output port and output port is a tunnel
       port, the translation of OpenFlow "sample" action should first emit
       set(tunnel(...)), then the sample action itself. It makes sure the
       egress tunnel information can be sampled.
    4) Add a test of flow-based IPFIX for tunnel set.

How to test flow-based IPFIX:
    1) Setup a test environment with two Linux host with Docker supported
    2) Create a Docker container and a GRE tunnel port on each host
    3) Use ovs-docker to add the container on the bridge
    4) Listen on port 4739 on the collector machine and use wireshark to filter
       'cflow' packets.
    5) Configure flow-based IPFIX:
       - 'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
       - 'ovs-vsctl -- set Flow_Sample_Collector_Set
          "Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX \
          targets=\"IP:4739\" cache_active_timeout=60 cache_max_flows=13 \
          other_config:enable-tunnel-sampling=true'
       - 'ovs-ofctl add-flow mybridge in_port=1,
          actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
          obs_point_id=456,sampling_port=3')',output:3'
       Note: The in-port is container port. The output port and sampling_port
             are both open flow port and the output port is a GRE tunnel port.
    6) Ping from the container whose host enabled flow-based IPFIX.
    7) Get the IPFIX template pakcets and IPFIX information packets.

Signed-off-by: Benli Ye <daniely@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-06-14 16:53:34 +08:00
+								            if (upcall->out_tun_key) {
-												ofproto-dpif-upcall: Check odp_tun_key_from_attr() return value.

In the IPFIX and flow sample upcall handling, check the validity
of the tunnel key returned by odp_tun_key_from_attr(). If the
tunnel key is invalid, return an error.

This was reported by Coverity, but the change also improves
robustness and avoids undefined behavior in the case of malformed
tunnel attributes.

Fixes: 8b7ea2d48033 ("Extend OVS IPFIX exporter to export tunnel headers")
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2025-06-05 16:51:27 +02:00
+								                if (odp_tun_key_from_attr(upcall->out_tun_key,
 								                                          &output_tunnel_key,
 								                                          NULL) != ODP_FIT_ERROR) {
 								                    return EINVAL;
 								                }
-												ipfix: Support tunnel information for Flow IPFIX.

Add support to export tunnel information for flow-based IPFIX.
The original steps to configure flow level IPFIX:
    1) Create a new record in Flow_Sample_Collector_Set table:
       'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
    2) Add IPFIX configuration which is referred by corresponding
       row in Flow_Sample_Collector_Set table:
       'ovs-vsctl -- set Flow_Sample_Collector_Set
       "Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX
       targets=\"IP:4739\" obs_domain_id=123 obs_point_id=456
       cache_active_timeout=60 cache_max_flows=13'
    3) Add sample action to the flows:
       'ovs-ofctl add-flow mybridge in_port=1,
       actions=sample'('probability=65535,collector_set_id=1,
       obs_domain_id=123,obs_point_id=456')',output:3'
NXAST_SAMPLE action was used in step 3. In order to support exporting tunnel
information, the NXAST_SAMPLE2 action was added and with NXAST_SAMPLE2 action
in this patch, the step 3 should be configured like below:
       'ovs-ofctl add-flow mybridge in_port=1,
       actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
       obs_point_id=456,sampling_port=3')',output:3'
'sampling_port' can be equal to ingress port or one of egress ports. If sampling
port is equal to output port and the output port is a tunnel port,
OVS_USERSPACE_ATTR_EGRESS_TUN_PORT will be set in the datapath flow sample action.
When flow sample action upcall happens, tunnel information will be retrieved from
the datapath and then IPFIX can export egress tunnel port information. If
samping_port=65535 (OFPP_NONE), flow-based IPFIX will keep the same behavior
as before.

This patch mainly do three tasks:
    1) Add a new flow sample action NXAST_SAMPLE2 to support exporting
       tunnel information. NXAST_SAMPLE2 action has a new added field
       'sampling_port'.
    2) Use 'other_configure: enable-tunnel-sampling' to enable or disable
       exporting tunnel information.
    3) If 'sampling_port' is equal to output port and output port is a tunnel
       port, the translation of OpenFlow "sample" action should first emit
       set(tunnel(...)), then the sample action itself. It makes sure the
       egress tunnel information can be sampled.
    4) Add a test of flow-based IPFIX for tunnel set.

How to test flow-based IPFIX:
    1) Setup a test environment with two Linux host with Docker supported
    2) Create a Docker container and a GRE tunnel port on each host
    3) Use ovs-docker to add the container on the bridge
    4) Listen on port 4739 on the collector machine and use wireshark to filter
       'cflow' packets.
    5) Configure flow-based IPFIX:
       - 'ovs-vsctl -- create Flow_Sample_Collector_Set id=1 bridge="Bridge UUID"'
       - 'ovs-vsctl -- set Flow_Sample_Collector_Set
          "Flow_Sample_Collector_Set UUID" ipfix=@i -- --id=@i create IPFIX \
          targets=\"IP:4739\" cache_active_timeout=60 cache_max_flows=13 \
          other_config:enable-tunnel-sampling=true'
       - 'ovs-ofctl add-flow mybridge in_port=1,
          actions=sample'('probability=65535,collector_set_id=1,obs_domain_id=123,
          obs_point_id=456,sampling_port=3')',output:3'
       Note: The in-port is container port. The output port and sampling_port
             are both open flow port and the output port is a GRE tunnel port.
    6) Ping from the container whose host enabled flow-based IPFIX.
    7) Get the IPFIX template pakcets and IPFIX information packets.

Signed-off-by: Benli Ye <daniely@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-06-14 16:53:34 +08:00
+								            }
-												ofproto-dpif: Reorganize upcall handling.

    - This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-09-29 17:44:08 -07:00
+								            actions_len = dpif_read_actions(udpif, upcall, flow,
 								                                            upcall->type, &ipfix_actions);
-												ofproto-dpif: Modify process_upcall() to remove some redundant code.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-04 14:01:31 -08:00
+								            if (upcall->type == IPFIX_UPCALL) {
 								                dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
 								                                         flow->in_port.odp_port,
 								                                         upcall->cookie.ipfix.output_odp_port,
 								                                         upcall->out_tun_key ?
 								                                             &output_tunnel_key : NULL,
 								                                         actions_len > 0 ?
 								                                             &ipfix_actions: NULL);
 								            } else {
 								                /* The flow reflects exactly the contents of the packet.
 								                 * Sample the packet using it. */
 								                dpif_ipfix_flow_sample(upcall->ipfix, packet, flow,
 								                                       &upcall->cookie, flow->in_port.odp_port,
 								                                       upcall->out_tun_key ?
 								                                           &output_tunnel_key : NULL,
 								                                       actions_len > 0 ? &ipfix_actions: NULL);
 								            }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								        }
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								        break;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								    case CONTROLLER_UPCALL:
 								        {
 								            struct user_action_cookie *cookie = &upcall->cookie;
 								            if (cookie->controller.dont_send) {
 								                return 0;
 								            }
 								            uint32_t recirc_id = cookie->controller.recirc_id;
 								            if (!recirc_id) {
 								                break;
 								            }
 								            const struct recirc_id_node *recirc_node
 								                                = recirc_id_node_find(recirc_id);
 								            if (!recirc_node) {
 								                break;
 								            }
-												ofproto-dpif: Don't slow-path controller actions with pause.

A previous patch removed slow-pathing for controller actions with the
exception of ones that specified "pause".  This commit removes that
restriction so that no controller actions are slow-pathed.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-18 23:16:22 -07:00
+								            const struct frozen_state *state = &recirc_node->state;
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								            struct ofproto_async_msg *am = xmalloc(sizeof *am);
 								            *am = (struct ofproto_async_msg) {
 								                .controller_id = cookie->controller.controller_id,
 								                .oam = OAM_PACKET_IN,
 								                .pin = {
 								                    .up = {
 								                        .base = {
 								                            .packet = xmemdup(dp_packet_data(packet),
 								                                              dp_packet_size(packet)),
 								                            .packet_len = dp_packet_size(packet),
 								                            .reason = cookie->controller.reason,
-												ofproto-dpif: Don't slow-path controller actions with pause.

A previous patch removed slow-pathing for controller actions with the
exception of ones that specified "pause".  This commit removes that
restriction so that no controller actions are slow-pathed.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-18 23:16:22 -07:00
+								                            .table_id = state->table_id,
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								                            .cookie = get_32aligned_be64(
 								                                         &cookie->controller.rule_cookie),
 								                            .userdata = (recirc_node->state.userdata_len
 								                                     ? xmemdup(recirc_node->state.userdata,
 								                                               recirc_node->state.userdata_len)
 								                                      : NULL),
 								                            .userdata_len = recirc_node->state.userdata_len,
 								                        },
 								                    },
 								                    .max_len = cookie->controller.max_len,
 								                },
 								            };
-												ofproto-dpif: Don't slow-path controller actions with pause.

A previous patch removed slow-pathing for controller actions with the
exception of ones that specified "pause".  This commit removes that
restriction so that no controller actions are slow-pathed.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-18 23:16:22 -07:00
+								            if (cookie->controller.continuation) {
 								                am->pin.up.stack = (state->stack_size
 								                          ? xmemdup(state->stack, state->stack_size)
 								                          : NULL),
 								                am->pin.up.stack_size = state->stack_size,
 								                am->pin.up.mirrors = state->mirrors,
 								                am->pin.up.conntracked = state->conntracked,
 								                am->pin.up.actions = (state->ofpacts_len
 								                            ? xmemdup(state->ofpacts,
 								                                      state->ofpacts_len) : NULL),
 								                am->pin.up.actions_len = state->ofpacts_len,
 								                am->pin.up.action_set = (state->action_set_len
 								                               ? xmemdup(state->action_set,
 								                                         state->action_set_len)
 								                               : NULL),
 								                am->pin.up.action_set_len = state->action_set_len,
 								                am->pin.up.bridge = upcall->ofproto->uuid;
-												ofproto-dpif: Fix continuation with patch port

This patch fixes the ofp_port to odp_port translation issue on patch
port with nxt_resume.  When OVS resumes processing a packet from
nxt_resume, OVS does not translate the ofp in_port to odp in_port
correctly if the packet is originally received from a patch port.
Currently,OVS sets the odp in_port for this resume pakcet as ODPP_NONE
and push the resume packet back to the datapath. Later on, if the packet
goes through a recirc, OVS will generate the following message since it
can not translate odp in_port (ODPP_NONE) back to ofp in_port during upcall,
and push down a datapath rule to drop the packet.

    ofproto_dpif_upcall(handler16)|INFO|received packet on unassociated
        datapath port 4294967295

When OVS revalidates the drop datapath flow with ODPP_NONE in_port, we
will see the following warning.
    ofproto_dpif_upcall(revalidator18)|WARN|Failed to acquire udpif_key
        corresponding to unexpected flow (Invalid argument): ufid:....

This patch resolves this issue by storing the odp in_port in the
continuation messages, and restores the odp in_port before push the
packet back to the datapath.

VMWare-BZ: 2364696
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-06-21 10:51:23 -07:00
+								                am->pin.up.odp_port = upcall->packet->md.in_port.odp_port;
-												ofproto-dpif: Don't slow-path controller actions with pause.

A previous patch removed slow-pathing for controller actions with the
exception of ones that specified "pause".  This commit removes that
restriction so that no controller actions are slow-pathed.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-18 23:16:22 -07:00
+								            }
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								            /* We don't want to use the upcall 'flow', since it may be
 								             * more specific than the point at which the "controller"
 								             * action was specified. */
 								            struct flow frozen_flow;
 								            frozen_flow = *flow;
-												ofproto-dpif: Don't slow-path controller actions with pause.

A previous patch removed slow-pathing for controller actions with the
exception of ones that specified "pause".  This commit removes that
restriction so that no controller actions are slow-pathed.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-18 23:16:22 -07:00
+								            if (!state->conntracked) {
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								                flow_clear_conntrack(&frozen_flow);
 								            }
-												tun_metadata: Fix coredump caused by use-after-free bug

Tun_metadata can be referened by flow and frozen_state at the same
time. When ovs-vswitchd handles TLV table mod message, the involved
tun_metadata gets freed. The call trace to free tun_metadata is
shown as below:

ofproto_run
- handle_openflow
  - handle_single_part_openflow
    - handle_tlv_table_mod
      - tun_metadata_table_mod
        - tun_metadata_postpone_free

Unfortunately, this tun_metadata can be still used by some frozen_state,
and later on when frozen_state tries to access its tun_metadata table,
ovs-vswitchd crashes. The call trace to access tun_metadata from
frozen_state is shown as below:

udpif_upcall_handler
- recv_upcalls
  - process_upcall
    - frozen_metadata_to_flow

It is unsafe for frozen_state to reference tun_table because tun_table
is protected by RCU while the lifecycle of frozen_state can span several
RCU quiesce states. Current code violates OVS's RCU protection mechanism.

This patch fixes it by simply stopping frozen_state from referencing
tun_table. If frozen_state needs tun_table, the latest valid tun_table
can be found through ofproto_get_tun_tab() efficiently.

A previous commit seems fixing the samiliar issue:
254878c18874f6 (ofproto-dpif-xlate: Fix segmentation fault caused by tun_table)

VMware-BZ: #2526222
Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-04-09 11:37:38 -07:00
+								            frozen_metadata_to_flow(&upcall->ofproto->up, &state->metadata,
 								                                    &frozen_flow);
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								            flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata);
 								            ofproto_dpif_send_async_msg(upcall->ofproto, am);
 								        }
 								        break;
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								    case BAD_UPCALL:
 								        break;
 								    }
-												ofproto-dpif: Move special upcall handling into ofproto-dpif-upcall.

Both the IPFIX and SFLOW modules are thread safe, so there's no
particular reason to pass them up to the main thread.  Eliminating
this step significantly simplifies the code.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 15:04:04 -07:00
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								    return EAGAIN;
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								}
 								static void
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								handle_upcalls(struct udpif *udpif, struct upcall *upcalls,
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
+								               size_t n_upcalls)
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								{
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
+								    struct dpif_op *opsp[UPCALL_MAX_BATCH * 2];
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								    struct ukey_op ops[UPCALL_MAX_BATCH * 2];
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    size_t n_ops, n_opsp, i;
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
-												ofproto: Remove per-flow miss hash table from upcall handler.

The upcall handler keeps a hash table which hashes flow to a list
of corresponding packets.  This used to be necessary as packets with
the same flow had similar actions and calculating actions used to be
a performance bottleneck.  Now that userspace action calculation
performance has improved, there is no need for this hash map.

This patch removes this hash map and each packet has its own upcall.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-05-20 21:50:19 -07:00
+								    /* Handle the packets individually in order of arrival.
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								     *
-												ofproto-dpif: Update handle_action() comment.

The comment didn't mention the SLOW_LLDP and SLOW_ACTION reasons.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-02-16 17:11:54 -08:00
+								     *   - For SLOW_CFM, SLOW_LACP, SLOW_STP, SLOW_BFD, and SLOW_LLDP,
 								     *     translation is what processes received packets for these
 								     *     protocols.
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								     *
-												ofproto-dpif: Update handle_action() comment.

The comment didn't mention the SLOW_LLDP and SLOW_ACTION reasons.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-02-16 17:11:54 -08:00
+								     *   - For SLOW_ACTION, translation executes the actions directly.
 								     *
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								     * The loop fills 'ops' with an array of operations to execute in the
 								     * datapath. */
 								    n_ops = 0;
-												ofproto-dpif-upcall: Remove the dispatcher thread.

With the foundation laid in previous commits, this commit
removes the 'dispatcher' thread by allowing 'handler'
threads to read upcalls directly from dpif.

This commit significantly simplifies the flow miss handling
code and brings slight improvement to flow setup rate.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>


											
										
										
											2014-02-26 23:03:24 -08:00
+								    for (i = 0; i < n_upcalls; i++) {
 								        struct upcall *upcall = &upcalls[i];
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								        const struct dp_packet *packet = upcall->packet;
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								        struct ukey_op *op;
-												ofproto-dpif: Fix a vlan-splinter megaflow bug

When vlan-splinter is enabled, ovs receives non-vlan flows from the
kernel vlan ports, vlan tag is then added to the incoming flow before
xlating, so that they look like those received from a trunk port.

In case megaflow is enabled, xlating may set vlan masks during rule
processing as usual. If those vlan masks were serialized and downloaded
to the kernel (this bug), those mega flows will be rejected due to
unexpected vlan mask encapsulation, since the original kernel flows do
not have vlan tags. This bug does not break connectivity, but impacts
performance since all traffic received on vlan splinter ports will now
be handled by vswitchd, as no datapath flows can be successfully
installed.

This fix is to make sure no vlan mask encapsulation is generated for
the datapath flow if its in_port was re-written by vlan-splinter
receiving logic.

Bug #22567

Signed-off-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-07 00:17:25 -08:00
-												upcall: Log failure to flow_put for dpif-netlink.

Previously these errors were only logged for dpif-netdev. Make it
consistent by merging the code for both datapaths.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-18 14:50:09 -07:00
+								        if (should_install_flow(udpif, upcall)) {
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								            struct udpif_key *ukey = upcall->ukey;
-												ofproto-dpif: Fix a vlan-splinter megaflow bug

When vlan-splinter is enabled, ovs receives non-vlan flows from the
kernel vlan ports, vlan tag is then added to the incoming flow before
xlating, so that they look like those received from a trunk port.

In case megaflow is enabled, xlating may set vlan masks during rule
processing as usual. If those vlan masks were serialized and downloaded
to the kernel (this bug), those mega flows will be rejected due to
unexpected vlan mask encapsulation, since the original kernel flows do
not have vlan tags. This bug does not break connectivity, but impacts
performance since all traffic received on vlan splinter ports will now
be handled by vswitchd, as no datapath flows can be successfully
installed.

This fix is to make sure no vlan mask encapsulation is generated for
the datapath flow if its in_port was re-written by vlan-splinter
receiving logic.

Bug #22567

Signed-off-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-01-07 00:17:25 -08:00
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								            if (ukey_install(udpif, ukey)) {
-												upcall: Only init flow_put if ukey is installed.

Currently when processing a batch of upcalls, all datapath operations
are first initialized, then later the corresponding ukeys are installed.
If the ukey_install fails at this later point, then the code needs to
backtrack a bit to delete the ukey and skip using the initialized
datapath op.

It's a little simpler to only initialize the datapath operation if the
ukey could actually be installed. The locks are held longer, but these
locks aren't heavily contended and the extended holding of the lock will
be removed in a subsequent patch anyway.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:03 -07:00
+								                upcall->ukey_persists = true;
 								                put_op_init(&ops[n_ops++], ukey, DPIF_FP_CREATE);
 								            }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        }
-												ofproto-dpif-xlate: Make xlate_actions() caller supply action buffer.

Until now, struct xlate_out has embedded an ofpbuf for actions and a large
stub for it, which xlate_actions() filled in during the flow translation
process.  This commit removes the embedded ofpbuf and stub, instead putting a
pointer to an ofpbuf into struct xlate_in, for a caller to fill in with a
pointer to its own structure if desired.  (If none is supplied,
xlate_actions() uses an internal scratch buffer and destroys it before
returning.)

This commit eliminates the last large data structure from
struct xlate_out, making the initialization of an entire xlate_out at
the beginning of xlate_actions() now reasonable.  More members will be
eliminated in upcoming commits, but this is no longer essential.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:34:16 -07:00
+								        if (upcall->odp_actions.size) {
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								            op = &ops[n_ops++];
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								            op->ukey = NULL;
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								            op->dop.type = DPIF_OP_EXECUTE;
-												Embrace anonymous unions.

Several OVS structs contain embedded named unions, like this:

struct {
    ...
    union {
        ...
    } u;
};

C11 standardized a feature that many compilers already implemented
anyway, where an embedded union may be unnamed, like this:

struct {
    ...
    union {
        ...
    };
};

This is more convenient because it allows the programmer to omit "u."
in many places.  OVS already used this feature in several places.  This
commit embraces it in several others.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: Alin Gabriel Serdean <aserdean@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@ovn.org>

											
										
										
											2018-05-24 10:32:59 -07:00
+								            op->dop.execute.packet = CONST_CAST(struct dp_packet *, packet);
 								            op->dop.execute.flow = upcall->flow;
-												userspace: Switching of L3 packets in L2 pipeline

Ports have a new layer3 attribute if they send/receive L3 packets.

The packet_type included in structs dp_packet and flow is considered in
ofproto-dpif. The classical L2 match fields (dl_src, dl_dst, dl_type, and
vlan_tci, vlan_vid, vlan_pcp) now have Ethernet as pre-requisite.

A dummy ethernet header is pushed to L3 packets received from L3 ports
before the the pipeline processing starts. The ethernet header is popped
before sending a packet to a L3 port.

For datapath ports that can receive L2 or L3 packets, the packet_type
becomes part of the flow key for datapath flows and is handled
appropriately in dpif-netdev.

In the 'else' branch in flow_put_on_pmd() function, the additional check
flow_equal(&match.flow, &netdev_flow->flow) was removed, as a) the dpcls
lookup is sufficient to uniquely identify a flow and b) it caused false
negatives because the flow in netdev->flow may not properly masked.

In dpif_netdev_flow_put() we now use the same method for constructing the
netdev_flow_key as the one used when adding the flow to the dplcs to make sure
these always match. The function netdev_flow_key_from_flow() used so far was
not only inefficient but sometimes caused mismatches and subsequent flow
update failures.

The kernel datapath does not support the packet_type match field.
Instead it encodes the packet type implictly by the presence or absence of
the Ethernet attribute in the flow key and mask.
This patch filters the PACKET_TYPE attribute out of netlink flow key and
mask to be sent to the kernel datapath.

Signed-off-by: Lorand Jakab <lojakab@cisco.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-02 16:16:17 +00:00
+								            odp_key_to_dp_packet(upcall->key, upcall->key_len,
-												Embrace anonymous unions.

Several OVS structs contain embedded named unions, like this:

struct {
    ...
    union {
        ...
    } u;
};

C11 standardized a feature that many compilers already implemented
anyway, where an embedded union may be unnamed, like this:

struct {
    ...
    union {
        ...
    };
};

This is more convenient because it allows the programmer to omit "u."
in many places.  OVS already used this feature in several places.  This
commit embraces it in several others.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: Alin Gabriel Serdean <aserdean@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@ovn.org>

											
										
										
											2018-05-24 10:32:59 -07:00
+								                                 op->dop.execute.packet);
 								            op->dop.execute.actions = upcall->odp_actions.data;
 								            op->dop.execute.actions_len = upcall->odp_actions.size;
 								            op->dop.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0;
 								            op->dop.execute.probe = false;
 								            op->dop.execute.mtu = upcall->mru;
-												ofproto-dpif-upcall: Echo HASH attribute back to datapath.

The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.

The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-15 10:58:59 +08:00
+								            op->dop.execute.hash = upcall->hash;
-												dpif-netlink: Provide original upcall pid in 'execute' commands.

When a packet enters kernel datapath and there is no flow to handle it,
packet goes to userspace through a MISS upcall.  With per-CPU upcall
dispatch mechanism, we're using the current CPU id to select the
Netlink PID on which to send this packet.  This allows us to send
packets from the same traffic flow through the same handler.

The handler will process the packet, install required flow into the
kernel and re-inject the original packet via OVS_PACKET_CMD_EXECUTE.

While handling OVS_PACKET_CMD_EXECUTE, however, we may hit a
recirculation action that will pass the (likely modified) packet
through the flow lookup again.  And if the flow is not found, the
packet will be sent to userspace again through another MISS upcall.

However, the handler thread in userspace is likely running on a
different CPU core, and the OVS_PACKET_CMD_EXECUTE request is handled
in the syscall context of that thread.  So, when the time comes to
send the packet through another upcall, the per-CPU dispatch will
choose a different Netlink PID, and this packet will end up processed
by a different handler thread on a different CPU.

The process continues as long as there are new recirculations, each
time the packet goes to a different handler thread before it is sent
out of the OVS datapath to the destination port.  In real setups the
number of recirculations can go up to 4 or 5, sometimes more.

There is always a chance to re-order packets while processing upcalls,
because userspace will first install the flow and then re-inject the
original packet.  So, there is a race window when the flow is already
installed and the second packet can match it inside the kernel and be
forwarded to the destination before the first packet is re-injected.
But the fact that packets are going through multiple upcalls handled
by different userspace threads makes the reordering noticeably more
likely, because we not only have a race between the kernel and a
userspace handler (which is hard to avoid), but also between multiple
userspace handlers.

For example, let's assume that 10 packets got enqueued through a MISS
upcall for handler-1, it will start processing them, will install the
flow into the kernel and start re-injecting packets back, from where
they will go through another MISS to handler-2.  Handler-2 will install
the flow into the kernel and start re-injecting the packets, while
handler-1 continues to re-inject the last of the 10 packets, they will
hit the flow installed by handler-2 and be forwarded without going to
the handler-2, while handler-2 still re-injects the first of these 10
packets.  Given multiple recirculations and misses, these 10 packets
may end up completely mixed up on the output from the datapath.

Let's provide the original upcall PID via the new netlink attribute
OVS_PACKET_ATTR_UPCALL_PID.  This way the upcall triggered during the
execution will go to the same handler.  Packets will be enqueued to
the same socket and re-injected in the same order.  This doesn't
eliminate re-ordering as stated above, since we still have a race
between the kernel and the handler thread, but it allows to eliminate
races between multiple handlers.

The openvswitch kernel module ignores unknown attributes for the
OVS_PACKET_CMD_EXECUTE, so it's safe to provide it even on older
kernels.

Reported-at: https://issues.redhat.com/browse/FDP-1479
Link: https://lore.kernel.org/netdev/20250702155043.2331772-1-i.maximets@ovn.org/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2025-07-08 13:34:02 +02:00
+								            op->dop.execute.upcall_pid = upcall->pid;
-												ofproto-dpif-upcall: Forward packets in order of arrival.

Until now, the code in ofproto-dpif-upcall (and the code that preceded it
in ofproto-dpif) obtained a batch of incoming packets, inserted them into
a hash table based on hashes of their flows, processed them, and then
forwarded them in hash order.  Usually this maintains order within a single
network connection, but because OVS's notion of a flow is so fine-grained,
it can reorder packets within (e.g.) a TCP connection if two packets
handled in a single batch have (e.g.) different ECN values.

This commit fixes the problem by making ofproto-dpif-upcall always forward
packets in the same order they were received.

This is far from the minimal change necessary to avoid reordering packets.
I think that the code is easier to understand afterward.

Reported-by: Dmitry Fleytman <dfleytma@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2013-09-19 11:03:47 -07:00
+								        }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								    }
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    /* Execute batch. */
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    n_opsp = 0;
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								    for (i = 0; i < n_ops; i++) {
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        opsp[n_opsp++] = &ops[i].dop;
 								    }
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								    dpif_operate(udpif->dpif, opsp, n_opsp, DPIF_OFFLOAD_AUTO);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    for (i = 0; i < n_ops; i++) {
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								        struct udpif_key *ukey = ops[i].ukey;
 								        if (ukey) {
 								            ovs_mutex_lock(&ukey->mutex);
 								            if (ops[i].dop.error) {
 								                transition_ukey(ukey, UKEY_EVICTED);
-												ofproto-dpif-upcall: Fix flow setup/delete race.

If a handler thread takes a long time to set up a set of flows, it is
possible for one of the installed flows to be dumped and scheduled
for deletion by a revalidator thread before the handler is able to
transition the ukey into an operational state---Between the
dpif_operate() above this function and the ukey lock / transition logic
modified by this patch.

Only transition the ukey for the flow if it wasn't already transitioned
to a later state by a revalidator thread.

Fixes: 54ebeff4c03d ("upcall: Track ukey states.")
Reported-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Joe Stringer <joe@ovn.org>
Tested-by: Paul Blakey <paulb@mellanox.com>

											
										
										
											2017-03-20 14:08:19 -07:00
+								            } else if (ukey->state < UKEY_OPERATIONAL) {
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								                transition_ukey(ukey, UKEY_OPERATIONAL);
 								            }
 								            ovs_mutex_unlock(&ukey->mutex);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        }
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								    }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								}
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								static uint32_t
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								get_ukey_hash(const ovs_u128 *ufid, const unsigned pmd_id)
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								{
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								    return hash_2words(ufid->u32[0], pmd_id);
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								static struct udpif_key *
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								ukey_lookup(struct udpif *udpif, const ovs_u128 *ufid, const unsigned pmd_id)
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								{
 								    struct udpif_key *ukey;
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								    int idx = get_ukey_hash(ufid, pmd_id) % N_UMAPS;
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								    struct cmap *cmap = &udpif->ukeys[idx].cmap;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								    CMAP_FOR_EACH_WITH_HASH (ukey, cmap_node,
 								                             get_ukey_hash(ufid, pmd_id), cmap) {
-												util: Pass 128-bit arguments directly instead of using pointers.

Commit f2d105b5 (ofproto-dpif-xlate: xlate ct_{mark, label} correctly.)
introduced the ovs_u128_and() function.  It directly takes ovs_u128
values as arguments instead of pointers to them.  As this is a bit more
direct way to deal with 128-bit values, modify the other utility
functions to do the same.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-05-03 18:20:51 -07:00
+								        if (ovs_u128_equals(ukey->ufid, *ufid)) {
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								            return ukey;
 								        }
 								    }
 								    return NULL;
 								}
-												ofproto-dpif-upcall: Make ukey actions modifiable with RCU.

Future patches will need to modify ukey actions in some instances.
This patch makes this possible by protecting them with RCU.  It also
adds thread safety checks to enforce the new protection mechanism.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-12 14:50:54 -07:00
+								/* Provides safe lockless access of RCU protected 'ukey->actions'.  Callers may
 								 * alternatively access the field directly if they take 'ukey->mutex'. */
 								static void
 								ukey_get_actions(struct udpif_key *ukey, const struct nlattr **actions, size_t *size)
 								{
 								    const struct ofpbuf *buf = ovsrcu_get(struct ofpbuf *, &ukey->actions);
 								    *actions = buf->data;
 								    *size = buf->size;
 								}
 								static void
 								ukey_set_actions(struct udpif_key *ukey, const struct ofpbuf *actions)
 								{
-												ofproto-dpif-upcall: Only call ovsrcu_postpone() on active actions

Currently, ovsrcu_postpone() is called even with a NULL argument,
i.e. when there is no data to be freed. This is causing additional
overhead because work is scheduled for the urcu thread. This change
avoids adding the postpone callback if no work needs to be done.

This especially helps for the OVS-DPDK case where the PMD threads
might no longer have to do a write() due to the latch_set(), and thus
saving a syscall.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-04-19 13:24:06 +02:00
+								    struct ofpbuf *old_actions = ovsrcu_get_protected(struct ofpbuf *,
 								                                                      &ukey->actions);
 								    if (old_actions) {
 								        ovsrcu_postpone(ofpbuf_delete, old_actions);
 								    }
-												ofproto-dpif-upcall: Make ukey actions modifiable with RCU.

Future patches will need to modify ukey actions in some instances.
This patch makes this possible by protecting them with RCU.  It also
adds thread safety checks to enforce the new protection mechanism.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-12 14:50:54 -07:00
+								    ovsrcu_set(&ukey->actions, ofpbuf_clone(actions));
 								}
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								static struct udpif_key *
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								ukey_create__(const struct nlattr *key, size_t key_len,
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								              const struct nlattr *mask, size_t mask_len,
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								              bool ufid_present, const ovs_u128 *ufid,
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								              const unsigned pmd_id, const struct ofpbuf *actions,
-												ofproto-dpif: Init ukey->dump_seq to zero

In the current implementation the dump_seq of a new datapath flow ukey
is set to seq_read(udpif->dump_seq). This implies that any revalidation
during the current dump_seq period (up to 500 ms) is skipped.

This can trigger incorrect behavior, for example when the the creation of
datapath flow triggers a PACKET_IN to the controller, which which course
the controller installs a new flow entry that should invalidate the
original datapath flow.

Initializing ukey->dump_seq to zero implies that the first dump of the
flow, be it for revalidation or dumping statistics, will always be
executed as zero is not a valid value of the ovs_seq.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-04-04 13:26:02 +02:00
+								              uint64_t reval_seq, long long int used,
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								              uint32_t key_recirc_id, struct xlate_out *xout)
-												revalidator: Refactor ukey creation/lookup.

This patch refactors the code around ukey creation and lookup to
simplify the code for callers. A new function ukey_acquire() combines
these functions and attempts to acquire a lock on the ukey. Failure to
acquire a lock on the ukey is usually a sign that another thread is
handling the same flow concurrently, which means the flow does not need
to be handled anyway.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-06-04 09:59:23 +00:00
+								    OVS_NO_THREAD_SAFETY_ANALYSIS
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								{
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								    struct udpif_key *ukey = xmalloc(sizeof *ukey);
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								    memcpy(&ukey->keybuf, key, key_len);
 								    ukey->key = &ukey->keybuf.nla;
 								    ukey->key_len = key_len;
 								    memcpy(&ukey->maskbuf, mask, mask_len);
 								    ukey->mask = &ukey->maskbuf.nla;
 								    ukey->mask_len = mask_len;
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								    ukey->ufid_present = ufid_present;
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								    ukey->ufid = *ufid;
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								    ukey->pmd_id = pmd_id;
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								    ukey->hash = get_ukey_hash(&ukey->ufid, pmd_id);
-												ofproto-dpif-upcall: Make ukey actions modifiable with RCU.

Future patches will need to modify ukey actions in some instances.
This patch makes this possible by protecting them with RCU.  It also
adds thread safety checks to enforce the new protection mechanism.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-12 14:50:54 -07:00
 								    ovsrcu_init(&ukey->actions, NULL);
 								    ukey_set_actions(ukey, actions);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
 								    ovs_mutex_init(&ukey->mutex);
-												ofproto-dpif: Init ukey->dump_seq to zero

In the current implementation the dump_seq of a new datapath flow ukey
is set to seq_read(udpif->dump_seq). This implies that any revalidation
during the current dump_seq period (up to 500 ms) is skipped.

This can trigger incorrect behavior, for example when the the creation of
datapath flow triggers a PACKET_IN to the controller, which which course
the controller installs a new flow entry that should invalidate the
original datapath flow.

Initializing ukey->dump_seq to zero implies that the first dump of the
flow, be it for revalidation or dumping statistics, will always be
executed as zero is not a valid value of the ovs_seq.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-04-04 13:26:02 +02:00
+								    ukey->dump_seq = 0;     /* Not yet dumped */
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    ukey->reval_seq = reval_seq;
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    ukey->state = UKEY_CREATED;
-												revalidator: Improve logging for transition_ukey().

There are a few cases where more introspection into ukey transitions
would be relevant for logging or assertion. Track the SOURCE_LOCATOR and
thread id when states are transitioned and use these for logging.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:12 -07:00
+								    ukey->state_thread = ovsthread_id_self();
 								    ukey->state_where = OVS_SOURCE_LOCATOR;
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								    ukey->created = ukey->flow_time = time_msec();
-												ofproto-dpif-upcall: Fix use of uninitialized missed dumps counter.

The first time revalidator checks the value - it is not initialized, so
we may end up marking valid flows for deletion.

 WARNING: MemorySanitizer: use-of-uninitialized-value
  0 0x6ee9e9 in revalidator_sweep__ ofproto/ofproto-dpif-upcall.c:3003:25
  1 0x6ed671 in revalidator_purge ofproto/ofproto-dpif-upcall.c:3056:5
  2 0x6e997d in udpif_stop_threads ofproto/ofproto-dpif-upcall.c:566:17
  3 0x6ecf05 in udpif_flush ofproto/ofproto-dpif-upcall.c:756:5
  4 0x60323e in flush ofproto/ofproto-dpif.c:2020:9
  5 0x56b10e in ofproto_flush__ ofproto/ofproto.c:1669:9
  6 0x56a67b in ofproto_destroy ofproto/ofproto.c:1821:5
  7 0x4c9012 in bridge_destroy vswitchd/bridge.c:3644:9
  8 0x4c7c13 in bridge_exit vswitchd/bridge.c:556:9
  9 0x5261a8 in main vswitchd/ovs-vswitchd.c:147:5
 10 0x7fa0bb in __libc_start_call_main
 11 0x7fa0bb in __libc_start_main@GLIBC_2.2.5
 12 0x432b24 in _start (vswitchd/ovs-vswitchd+0x432b24)

Fixes: 180ab2fd635e ("ofproto-dpif-upcall: Avoid stale ukeys leaks.")
Acked-by: Mike Pattrick <mkp@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-11-28 14:18:16 +01:00
+								    ukey->missed_dumps = 0;
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								    memset(&ukey->stats, 0, sizeof ukey->stats);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    ukey->stats.used = used;
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								    ukey->dp_layer = NULL;
-												ofproto-dpif-xlate: Cache xlate_actions() effects.

This patch adds a new object called 'struct xlate_cache' which can be
set in 'struct xlate_in', and passed to xlate_actions() to cache the
modules affected by this flow translation. Subsequently, the caller can
pass the xcache to xlate_push_stats() to credit stats and perform side
effects for a lower cost than full flow translation.

These changes are aimed currently at long-lived flows, decreasing the
average dump duration for such flows by 50-80%. This allows more flows
to be supported in the datapath at a given time. Applying these changes
to short-lived flows is left for a later commit.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Add caching for fin_timeout action.
    Expire netflows on xlate_cache_clear().
    Account to bonds using a copy of 'flow' rather than hash.
    Always build XC_NORMAL entry (previously only if may_learn is true)
    Rename xlate_from_cache()->xlate_push_stats()
    Add may_learn parameter to xlate_push_stats()
    Tidy up xlate_actions__() mirror/netflow code.
    Fold in style fixups.
RFC: First post.

											
										
										
											2014-04-10 16:00:28 +12:00
+								    ukey->xcache = NULL;
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
-												revalidator: Gather packets-per-second rate of flows

This is the second patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The packets-per-second (pps) rate for each flow is computed in the context
of revalidator threads when the flow stats are retrieved. The pps-rate is
computed only after a flow is revalidated and is not scheduled for
deletion. The parameters used to compute pps and the pps itself are saved
in udpif_key since they need to be persisted across iterations of
rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:13 +05:30
+								    ukey->offloaded = false;
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								    ukey->in_netdev = NULL;
-												revalidator: Gather packets-per-second rate of flows

This is the second patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The packets-per-second (pps) rate for each flow is computed in the context
of revalidator threads when the flow stats are retrieved. The pps-rate is
computed only after a flow is revalidated and is not scheduled for
deletion. The parameters used to compute pps and the pps itself are saved
in udpif_key since they need to be persisted across iterations of
rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:13 +05:30
+								    ukey->flow_packets = ukey->flow_backlog_packets = 0;
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								    ukey->key_recirc_id = key_recirc_id;
 								    recirc_refs_init(&ukey->recircs);
 								    if (xout) {
 								        /* Take ownership of the action recirc id references. */
 								        recirc_refs_swap(&ukey->recircs, &xout->recircs);
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								    }
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								    return ukey;
 								}
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								static struct udpif_key *
-												ofproto-dpif-xlate: Make xlate_actions() caller supply flow_wildcards.

Until now, struct xlate_out has embedded a struct flow_wildcards, which
xlate_actions() filled in during the flow translation process (unless this
was disabled with xin->skip_wildcards, which in classifier microbenchmarks
saves significant time).  This commit removes the embedded flow_wildcards
and 'skip_wildcards', instead putting a pointer to a flow_wildcards into
struct xlate_in, for a caller to fill in with a pointer to its own
structure if desired.

One reason for this change is performance.  Until now, the userspace slow
path has done a full copy of a struct flow_wildcards for each upcall in
upcall_cb().  This commit eliminates that copy.  I don't know whether this
has a measurable performance impact; it may, because struct flow copies
had a noticeable cost in slow-path stress tests even when struct flow was
half its current size.

This commit also eliminates a large data structure from struct xlate_out,
reducing the cost of the initialization of that structure at the beginning
of xlate_actions().  However, there is more size reduction to come in
later commits.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-07-31 13:15:52 -07:00
+								ukey_create_from_upcall(struct upcall *upcall, struct flow_wildcards *wc)
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								{
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								    struct odputil_keybuf keystub, maskstub;
 								    struct ofpbuf keybuf, maskbuf;
-												odp-util: Share fields between odp and dpif_backer.

Datapath support for some flow key fields is used inside ofproto-dpif as
well as odp-util. Share these fields using the same structure.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2015-06-30 16:43:03 -07:00
+								    bool megaflow;
-												odp-util: Convert flow serialization parameters to a struct.

Serializing between userspace flows and netlink attributes currently
requires several additional parameters besides the flows themselves.
This will continue to grow in the future as well. This converts
the function arguments to a parameters struct, which makes the code
easier to read and allowing irrelevant arguments to be omitted.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2015-06-16 11:15:28 -07:00
+								    struct odp_flow_key_parms odp_parms = {
 								        .flow = upcall->flow,
-												ovs-vswitchd: Avoid segfault for "netdev" datapath.

When the datapath, whose type is "netdev", processes packets
in userspce action, it may cause a segmentation fault. In the
dp_execute_userspace_action(), we pass the "wc" argument to
dp_netdev_upcall() using NULL. In the dp_netdev_upcall() call tree,
the "wc" will be used. For example, dp_netdev_upcall() uses the
&wc->masks for debugging, and flow_wildcards_init_for_packet()
uses the  "wc" if we disable megaflow, which is described in
more detail below.

Segmentation fault in flow_wildcards_init_for_packet:

    #0  0x0000000000468fe8 flow_wildcards_init_for_packet lib/flow.c:1275
    #1  0x0000000000436c0b upcall_cb ofproto/ofproto-dpif-upcall.c:1231
    #2  0x000000000045bd96 dp_netdev_upcall lib/dpif-netdev.c:3857
    #3  0x0000000000461bf3 dp_execute_userspace_action lib/dpif-netdev.c:4388
    #4  dp_execute_cb lib/dpif-netdev.c:4521
    #5  0x0000000000486ae2 odp_execute_actions lib/odp-execute.c:538
    #6  0x00000000004607f9 dp_netdev_execute_actions lib/dpif-netdev.c:4627
    #7  packet_batch_per_flow_execute lib/dpif-netdev.c:3927
    #8  dp_netdev_input__ lib/dpif-netdev.c:4229
    #9  0x0000000000460ba8 dp_netdev_input lib/dpif-netdev.c:4238
    #10 dp_netdev_process_rxq_port lib/dpif-netdev.c:2873
    #11 0x000000000046126e dpif_netdev_run lib/dpif-netdev.c:3000
    #12 0x000000000042baf5 type_run ofproto/ofproto-dpif.c:504
    #13 0x00000000004192bf ofproto_type_run ofproto/ofproto.c:1687
    #14 0x0000000000409965 bridge_run__ vswitchd/bridge.c:2875
    #15 0x000000000040f145 bridge_run vswitchd/bridge.c:2938
    #16 0x00000000004062e5 main vswitchd/ovs-vswitchd.c:111

Signed-off-by: nickcooper-zhangtonghao <nic@opencloud.tech>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-12-07 10:04:04 -08:00
+								        .mask = wc ? &wc->masks : NULL,
-												odp-util: Convert flow serialization parameters to a struct.

Serializing between userspace flows and netlink attributes currently
requires several additional parameters besides the flows themselves.
This will continue to grow in the future as well. This converts
the function arguments to a parameters struct, which makes the code
easier to read and allowing irrelevant arguments to be omitted.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2015-06-16 11:15:28 -07:00
+								    };
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
-												ofproto-dpif: Add boottime support field.

When changing support fields, it may be unsafe to set support level
beyond what datapath can support.

This patch introduce the notion of boot time support and
runtime support fields. Boot time support are set only
once during ofproto start up phase, and not changed during
runtime. The runtime support fields are the same as boot time
support fields at the startup time, but can be changed via
the 'ovs-appctl' command.  However, each change will
be checked against the corresponding boot time support field. Only
feature reduction from the boot time support is allowed.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-06-16 23:39:31 -07:00
+								    odp_parms.support = upcall->ofproto->backer->rt_support.odp;
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								    if (upcall->key_len) {
 								        ofpbuf_use_const(&keybuf, upcall->key, upcall->key_len);
 								    } else {
 								        /* dpif-netdev doesn't provide a netlink-formatted flow key in the
 								         * upcall, so convert the upcall's flow here. */
 								        ofpbuf_use_stack(&keybuf, &keystub, sizeof keystub);
-												odp-util: Convert flow serialization parameters to a struct.

Serializing between userspace flows and netlink attributes currently
requires several additional parameters besides the flows themselves.
This will continue to grow in the future as well. This converts
the function arguments to a parameters struct, which makes the code
easier to read and allowing irrelevant arguments to be omitted.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2015-06-16 11:15:28 -07:00
+								        odp_flow_key_from_flow(&odp_parms, &keybuf);
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								    }
 								    atomic_read_relaxed(&enable_megaflows, &megaflow);
 								    ofpbuf_use_stack(&maskbuf, &maskstub, sizeof maskstub);
-												ovs-vswitchd: Avoid segfault for "netdev" datapath.

When the datapath, whose type is "netdev", processes packets
in userspce action, it may cause a segmentation fault. In the
dp_execute_userspace_action(), we pass the "wc" argument to
dp_netdev_upcall() using NULL. In the dp_netdev_upcall() call tree,
the "wc" will be used. For example, dp_netdev_upcall() uses the
&wc->masks for debugging, and flow_wildcards_init_for_packet()
uses the  "wc" if we disable megaflow, which is described in
more detail below.

Segmentation fault in flow_wildcards_init_for_packet:

    #0  0x0000000000468fe8 flow_wildcards_init_for_packet lib/flow.c:1275
    #1  0x0000000000436c0b upcall_cb ofproto/ofproto-dpif-upcall.c:1231
    #2  0x000000000045bd96 dp_netdev_upcall lib/dpif-netdev.c:3857
    #3  0x0000000000461bf3 dp_execute_userspace_action lib/dpif-netdev.c:4388
    #4  dp_execute_cb lib/dpif-netdev.c:4521
    #5  0x0000000000486ae2 odp_execute_actions lib/odp-execute.c:538
    #6  0x00000000004607f9 dp_netdev_execute_actions lib/dpif-netdev.c:4627
    #7  packet_batch_per_flow_execute lib/dpif-netdev.c:3927
    #8  dp_netdev_input__ lib/dpif-netdev.c:4229
    #9  0x0000000000460ba8 dp_netdev_input lib/dpif-netdev.c:4238
    #10 dp_netdev_process_rxq_port lib/dpif-netdev.c:2873
    #11 0x000000000046126e dpif_netdev_run lib/dpif-netdev.c:3000
    #12 0x000000000042baf5 type_run ofproto/ofproto-dpif.c:504
    #13 0x00000000004192bf ofproto_type_run ofproto/ofproto.c:1687
    #14 0x0000000000409965 bridge_run__ vswitchd/bridge.c:2875
    #15 0x000000000040f145 bridge_run vswitchd/bridge.c:2938
    #16 0x00000000004062e5 main vswitchd/ovs-vswitchd.c:111

Signed-off-by: nickcooper-zhangtonghao <nic@opencloud.tech>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-12-07 10:04:04 -08:00
+								    if (megaflow && wc) {
-												odp-util: Pass down flow netlink attributes when translating masks.

Sometimes we need to look at flow fields to understand how to parse
an attribute. However, masks don't have this information - just the
mask on the field. We already use the translated flow structure for
this purpose but this isn't always enough since sometimes we actually
need the raw netlink information. Fortunately, that is also readily
available so this passes it down from the appropriate callers.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-06-19 13:54:13 -07:00
+								        odp_parms.key_buf = &keybuf;
-												odp-util: Convert flow serialization parameters to a struct.

Serializing between userspace flows and netlink attributes currently
requires several additional parameters besides the flows themselves.
This will continue to grow in the future as well. This converts
the function arguments to a parameters struct, which makes the code
easier to read and allowing irrelevant arguments to be omitted.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2015-06-16 11:15:28 -07:00
+								        odp_flow_key_from_mask(&odp_parms, &maskbuf);
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								    }
-												ofpbuf: Simplify ofpbuf API.

ofpbuf was complicated due to its wide usage across all
layers of OVS, Now we have introduced independent dp_packet
which can be used for datapath packet, we can simplify ofpbuf.
Following patch removes DPDK mbuf and access API of ofpbuf
members.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-03-02 17:29:44 -08:00
+								    return ukey_create__(keybuf.data, keybuf.size, maskbuf.data, maskbuf.size,
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								                         true, upcall->ufid, upcall->pmd_id,
-												ofproto-dpif: Init ukey->dump_seq to zero

In the current implementation the dump_seq of a new datapath flow ukey
is set to seq_read(udpif->dump_seq). This implies that any revalidation
during the current dump_seq period (up to 500 ms) is skipped.

This can trigger incorrect behavior, for example when the the creation of
datapath flow triggers a PACKET_IN to the controller, which which course
the controller installs a new flow entry that should invalidate the
original datapath flow.

Initializing ukey->dump_seq to zero implies that the first dump of the
flow, be it for revalidation or dumping statistics, will always be
executed as zero is not a valid value of the ovs_seq.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-04-04 13:26:02 +02:00
+								                         &upcall->put_actions, upcall->reval_seq, 0,
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								                         upcall->have_recirc_ref ? upcall->recirc->id : 0,
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								                         &upcall->xout);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								}
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								static int
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								ukey_create_from_dpif_flow(const struct udpif *udpif,
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								                           const struct dpif_flow *flow,
 								                           struct udpif_key **ukey)
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								{
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    struct dpif_flow full_flow;
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
+								    struct ofpbuf actions;
-												ofproto-dpif: Init ukey->dump_seq to zero

In the current implementation the dump_seq of a new datapath flow ukey
is set to seq_read(udpif->dump_seq). This implies that any revalidation
during the current dump_seq period (up to 500 ms) is skipped.

This can trigger incorrect behavior, for example when the the creation of
datapath flow triggers a PACKET_IN to the controller, which which course
the controller installs a new flow entry that should invalidate the
original datapath flow.

Initializing ukey->dump_seq to zero implies that the first dump of the
flow, be it for revalidation or dumping statistics, will always be
executed as zero is not a valid value of the ovs_seq.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-04-04 13:26:02 +02:00
+								    uint64_t reval_seq;
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    uint64_t stub[DPIF_FLOW_BUFSIZE / 8];
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								    const struct nlattr *a;
 								    unsigned int left;
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								    if (!flow->key_len || !flow->actions_len) {
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        struct ofpbuf buf;
 								        int err;
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								        /* If the key or actions were not provided by the datapath, fetch the
 								         * full flow. */
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        ofpbuf_use_stack(&buf, &stub, sizeof stub);
-												ofproto-dpif-upcall: Pass key to dpif_flow_get().

Windows datapath folks have reported instances where OVS userspace will
pass down a flow_get request to the datapath using a UFID even though the
datapath has no support for UFIDs. Since commit e672ff9b4d22
("ofproto-dpif: Restore metadata and registers on recirculation."), if a
flow dump provides a flow that userspace isn't aware of, and the flow
dump doesn't provide actions for that flow, then userspace will attempt
a flow_get using just the UFID. This is because the ofproto-dpif layer
doesn't pass the key down to the dpif layer even if it's available.
Prior to the above commit, the codepath was only hit if the key was not
available, which would have implied UFID support. This assumption is now
broken: An empty set of actions could also trigger flow_get, and
datapaths without UFID support are free to pass up empty actions lists.

Pass down the flow key if available, and don't pass down the UFID if
unavailable to be more consistent with the usage of other dpif APIs
within this file.

Fixes: e672ff9b4d22 ("ofproto-dpif: Restore metadata and registers on recirculation.")
Reported-by: Sairam Venugopal <vsairam@vmware.com>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-05-10 15:42:01 -07:00
+								        err = dpif_flow_get(udpif->dpif, flow->key, flow->key_len,
 								                            flow->ufid_present ? &flow->ufid : NULL,
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								                            flow->pmd_id, &buf, &full_flow);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        if (err) {
 								            return err;
 								        }
 								        flow = &full_flow;
 								    }
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
 								    /* Check the flow actions for recirculation action.  As recirculation
 								     * relies on OVS userspace internal state, we need to delete all old
-												upcall: Check for recirc_id in ukey_create_from_dpif_flow()

Filter out not only flows with recirculation actions, but also flows
with non-zero recirculation id in flow key when creating ukeys from
datapath flows, as such flows also depend on the recirculation
context, which have been lost after a restart.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>
											
										
										
											2015-11-04 15:47:36 -08:00
+								     * datapath flows with either a non-zero recirc_id in the key, or any
 								     * recirculation actions upon OVS restart. */
-												ofproto-dpif-upcall: Fix key attr iteration.

This call is operating on messages generated by the datapath. If a
datapath implementation sends improperly formatted netlink attributes,
then it's possible for a revalidator thread to end up trapped in an
infinite loop iterating across these attributes. Rather than using the
UNSAFE variation of this iterator, use the regular version.

Fixes: 994fcc5a15d3 ("upcall: Check for recirc_id in ukey_create_from_dpif_flow()")
Signed-off-by: Joe Stringer <joe@ovn.org>
Reviewed-by: Greg Rose <gvrose8192@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-31 16:54:22 -07:00
+								    NL_ATTR_FOR_EACH (a, left, flow->key, flow->key_len) {
-												upcall: Check for recirc_id in ukey_create_from_dpif_flow()

Filter out not only flows with recirculation actions, but also flows
with non-zero recirculation id in flow key when creating ukeys from
datapath flows, as such flows also depend on the recirculation
context, which have been lost after a restart.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Joe Stringer <joestringer@nicira.com>
											
										
										
											2015-11-04 15:47:36 -08:00
+								        if (nl_attr_type(a) == OVS_KEY_ATTR_RECIRC_ID
 								            && nl_attr_get_u32(a) != 0) {
 								            return EINVAL;
 								        }
 								    }
-												ofproto-dpif-upcall: Fix action attr iteration.

This calls is operating on messages generated by the datapath. If a
datapath implementation sends improperly formatted netlink attributes,
then it's possible for a revalidator thread to end up trapped in an
infinite loop iterating across the actions attributes. Rather than using
the UNSAFE variation of this iterator, use the regular version.

Fixes: e672ff9b4d22 ("ofproto-dpif: Restore metadata and registers on recirculation.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-31 16:54:21 -07:00
+								    NL_ATTR_FOR_EACH (a, left, flow->actions, flow->actions_len) {
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								        if (nl_attr_type(a) == OVS_ACTION_ATTR_RECIRC) {
 								            return EINVAL;
 								        }
 								    }
-												revalidator: Revalidate ukeys created from flows.

If there is no active ukey for a particular datapath flow, and it is
dumped from the datapath, then the revalidator threads will assemble a
ukey based on the datapath flow. This will allow tracking of the stats
for proper attribution, and future validation of the flow.

However, until now when creating the ukey in this context, the ukey's
'reval_seq' has been set to the current udpif's reval_seq. This implies
that the flow has been validated against the current flow table.
However, this is not true - The flow appeared in the datapath without
any prior knowledge in this OVS instance so we should set up the
reval_seq of the ukey to ensure that the flow will be validated during
the current dump/revalidation cycle.

Refer also revalidate_ukey().

Fixes: 23597df05226 ("upcall: Create ukeys in handler threads.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-05-01 12:58:06 -07:00
+								    reval_seq = seq_read(udpif->reval_seq) - 1; /* Ensure revalidation. */
-												ofproto: fix stack-buffer-overflow

Should use flow->actions not &flow->actions.

here is ASAN report:
=================================================================
==57189==ERROR: AddressSanitizer: stack-buffer-overflow on address 0xffff428fa0e8 at pc 0xffff7f61a520 bp 0xffff428f9420 sp 0xffff428f9498 READ of size 196 at 0xffff428fa0e8 thread T150 (revalidator22)
    #0 0xffff7f61a51f in __interceptor_memcpy (/lib64/libasan.so.4+0xa251f)
    #1 0xaaaad26a3b2b in ofpbuf_put lib/ofpbuf.c:426
    #2 0xaaaad26a30cb in ofpbuf_clone_data_with_headroom lib/ofpbuf.c:248
    #3 0xaaaad26a2e77 in ofpbuf_clone_with_headroom lib/ofpbuf.c:218
    #4 0xaaaad26a2dc3 in ofpbuf_clone lib/ofpbuf.c:208
    #5 0xaaaad23e3993 in ukey_set_actions ofproto/ofproto-dpif-upcall.c:1640
    #6 0xaaaad23e3f03 in ukey_create__ ofproto/ofproto-dpif-upcall.c:1696
    #7 0xaaaad23e553f in ukey_create_from_dpif_flow ofproto/ofproto-dpif-upcall.c:1806
    #8 0xaaaad23e65fb in ukey_acquire ofproto/ofproto-dpif-upcall.c:1984
    #9 0xaaaad23eb583 in revalidate ofproto/ofproto-dpif-upcall.c:2625
    #10 0xaaaad23dee5f in udpif_revalidator ofproto/ofproto-dpif-upcall.c:1076
    #11 0xaaaad26b84ef in ovsthread_wrapper lib/ovs-thread.c:708
    #12 0xffff7e74a8bb in start_thread (/lib64/libpthread.so.0+0x78bb)
    #13 0xffff7e0665cb in thread_start (/lib64/libc.so.6+0xd55cb)

Address 0xffff428fa0e8 is located in stack of thread T150 (revalidator22) at offset 328 in frame
    #0 0xaaaad23e4cab in ukey_create_from_dpif_flow ofproto/ofproto-dpif-upcall.c:1762

  This frame has 4 object(s):
    [32, 96) 'actions'
    [128, 192) 'buf'
    [224, 328) 'full_flow'
    [384, 2432) 'stub' <== Memory access at offset 328 partially underflows this variable
HINT: this may be a false positive if your program uses some custom stack unwind mechanism or swapcontext
      (longjmp and C++ exceptions *are* supported) Thread T150 (revalidator22) created by T0 here:
    #0 0xffff7f5b0f7f in __interceptor_pthread_create (/lib64/libasan.so.4+0x38f7f)
    #1 0xaaaad26b891f in ovs_thread_create lib/ovs-thread.c:792
    #2 0xaaaad23dc62f in udpif_start_threads ofproto/ofproto-dpif-upcall.c:639
    #3 0xaaaad23daf87 in ofproto_set_flow_table ofproto/ofproto-dpif-upcall.c:446
    #4 0xaaaad230ff7f in dpdk_evs_cfg_set vswitchd/bridge.c:1134
    #5 0xaaaad2310097 in bridge_reconfigure vswitchd/bridge.c:1148
    #6 0xaaaad23279d7 in bridge_run vswitchd/bridge.c:3944
    #7 0xaaaad23365a3 in main vswitchd/ovs-vswitchd.c:240
    #8 0xffff7dfb1adf in __libc_start_main (/lib64/libc.so.6+0x20adf)
    #9 0xaaaad230a3d3  (/usr/sbin/ovs-vswitchd-2.7.0-1.1.RC5.001.asan+0x26f3d3)

SUMMARY: AddressSanitizer: stack-buffer-overflow (/lib64/libasan.so.4+0xa251f) in __interceptor_memcpy Shadow bytes around the buggy address:
  0x200fe851f3c0: 00 00 00 00 f1 f1 f1 f1 f8 f2 f2 f2 00 00 00 00
  0x200fe851f3d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200fe851f3e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200fe851f3f0: 00 00 00 00 f1 f1 f1 f1 00 00 00 00 00 00 00 00
  0x200fe851f400: f2 f2 f2 f2 f8 f8 f8 f8 f8 f8 f8 f8 f2 f2 f2 f2
=>0x200fe851f410: 00 00 00 00 00 00 00 00 00 00 00 00 00[f2]f2 f2
  0x200fe851f420: f2 f2 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00
  0x200fe851f430: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200fe851f440: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200fe851f450: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x200fe851f460: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==57189==ABORTING

Acked-by: Numan Siddique <numans@ovn.org>
Signed-off-by: Linhaifeng <haifeng.lin@huawei.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-29 06:13:35 +00:00
+								    ofpbuf_use_const(&actions, flow->actions, flow->actions_len);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    *ukey = ukey_create__(flow->key, flow->key_len,
 								                          flow->mask, flow->mask_len, flow->ufid_present,
-												ofproto-dpif: Init ukey->dump_seq to zero

In the current implementation the dump_seq of a new datapath flow ukey
is set to seq_read(udpif->dump_seq). This implies that any revalidation
during the current dump_seq period (up to 500 ms) is skipped.

This can trigger incorrect behavior, for example when the the creation of
datapath flow triggers a PACKET_IN to the controller, which which course
the controller installs a new flow entry that should invalidate the
original datapath flow.

Initializing ukey->dump_seq to zero implies that the first dump of the
flow, be it for revalidation or dumping statistics, will always be
executed as zero is not a valid value of the ovs_seq.

Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-04-04 13:26:02 +02:00
+								                          &flow->ufid, flow->pmd_id, &actions,
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								                          reval_seq, flow->stats.used, 0, NULL);
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    return 0;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								}
-												upcall: Replace ukeys for deleted flows.

If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.

However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.

To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:05 -07:00
+								static bool
 								try_ukey_replace(struct umap *umap, struct udpif_key *old_ukey,
 								                 struct udpif_key *new_ukey)
 								    OVS_REQUIRES(umap->mutex)
 								    OVS_TRY_LOCK(true, new_ukey->mutex)
 								{
 								    bool replaced = false;
 								    if (!ovs_mutex_trylock(&old_ukey->mutex)) {
 								        if (old_ukey->state == UKEY_EVICTED) {
 								            /* The flow was deleted during the current revalidator dump,
 								             * but its ukey won't be fully cleaned up until the sweep phase.
 								             * In the mean time, we are receiving upcalls for this traffic.
 								             * Expedite the (new) flow install by replacing the ukey. */
 								            ovs_mutex_lock(&new_ukey->mutex);
 								            cmap_replace(&umap->cmap, &old_ukey->cmap_node,
 								                         &new_ukey->cmap_node, new_ukey->hash);
-												ofproto-dpif-upcall: New ukey needs to take the old ukey's dump seq.

The userspace datapath manages all the magaflows by a cmap. The cmap
data structure will grow/shrink during the datapath processing and it
will re-position megaflows. This might result in two revalidator threads
might process a same megaflow during one dump stage.

Consider a situation that, revalidator 1 processes a megaflow A, and
decides to delete it from the datapath, at the mean time, this megaflow
A is also queued in the process batch of revalidator 2. Normally it's ok
for revalidators to process the same megaflow multiple times, as the
dump_seq shows it's already dumped and the stats will not be contributed
twice.

Assume that right after A is deleted, a PMD thread generates again
a new megaflow B which has the same match and action of A. The ukey
of megaflow B will replace the one of megaflow A. Now the ukey B is
new to the revalidator system and its dump seq is 0.

Now since the dump seq of ukey B is 0, when processing megaflow A,
the revalidator 2 will not identify this megaflow A has already been
dumped by revalidator 1 and will contribute the old megaflow A's stats
again, this results in an inconsistent stats between ukeys and megaflows.

To fix this, the newly generated the ukey B should take the dump_seq
of the replaced ukey A to avoid a same megaflow being revalidated
twice in one dump stage.

We observe in the production environment, the OpenFlow rules' stats
sometimes are amplified compared to the actual value.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-11-27 07:28:55 +00:00
+								            new_ukey->dump_seq = old_ukey->dump_seq;
-												upcall: Replace ukeys for deleted flows.

If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.

However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.

To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:05 -07:00
+								            ovsrcu_postpone(ukey_delete__, old_ukey);
 								            transition_ukey(old_ukey, UKEY_DELETED);
 								            transition_ukey(new_ukey, UKEY_VISIBLE);
 								            replaced = true;
-												ofproto-dpif-upcall: Fix ukey installation failure logs and counters.

ukey_install() returns boolean signaling if the ukey was installed
or not.  Installation may fail for a few reasons:

 1. Conflicting ukey.
 2. Mutex contention while trying to replace existing ukey.
 3. The same ukey already exists and active.

Only the first case here signals an actual problem.  Third one is
a little odd for userspace datapath, but harmless.  Second is the
most common one that can easily happen during normal operation
since other threads like revalidators may be currently working on
this ukey preventing an immediate access.

Since only the first case is actually worth logging and it already
has its own log message, removing the 'upcall installation fails'
warning from the upcall_cb().  This should fix most of the random
failures of userspace system tests in CI.

While at it, also fixing coverage counters.  Mutex contention was
mistakenly counted as a duplicate upcall.  ukey contention for
revalidators was counted only in one of two places.

New counter added for the ukey contention on replace.  We should
not re-use existing upcall_ukey_contention counter for this, since
it may lead to double counting.

Fixes: 67f08985d769 ("upcall: Replace ukeys for deleted flows.")
Fixes: 9cec8274ed9a ("ofproto-dpif-upcall: Add VLOG_WARN_RL logs for upcall_cb() error.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-04-04 14:09:37 +02:00
+								            COVERAGE_INC(upcall_ukey_replace);
 								        } else {
 								            COVERAGE_INC(handler_duplicate_upcall);
-												upcall: Replace ukeys for deleted flows.

If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.

However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.

To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:05 -07:00
+								        }
 								        ovs_mutex_unlock(&old_ukey->mutex);
 								    } else {
-												ofproto-dpif-upcall: Fix ukey installation failure logs and counters.

ukey_install() returns boolean signaling if the ukey was installed
or not.  Installation may fail for a few reasons:

 1. Conflicting ukey.
 2. Mutex contention while trying to replace existing ukey.
 3. The same ukey already exists and active.

Only the first case here signals an actual problem.  Third one is
a little odd for userspace datapath, but harmless.  Second is the
most common one that can easily happen during normal operation
since other threads like revalidators may be currently working on
this ukey preventing an immediate access.

Since only the first case is actually worth logging and it already
has its own log message, removing the 'upcall installation fails'
warning from the upcall_cb().  This should fix most of the random
failures of userspace system tests in CI.

While at it, also fixing coverage counters.  Mutex contention was
mistakenly counted as a duplicate upcall.  ukey contention for
revalidators was counted only in one of two places.

New counter added for the ukey contention on replace.  We should
not re-use existing upcall_ukey_contention counter for this, since
it may lead to double counting.

Fixes: 67f08985d769 ("upcall: Replace ukeys for deleted flows.")
Fixes: 9cec8274ed9a ("ofproto-dpif-upcall: Add VLOG_WARN_RL logs for upcall_cb() error.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-04-04 14:09:37 +02:00
+								        COVERAGE_INC(ukey_replace_contention);
-												upcall: Replace ukeys for deleted flows.

If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.

However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.

To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:05 -07:00
+								    }
-												ofproto-dpif-upcall: Fix ukey installation failure logs and counters.

ukey_install() returns boolean signaling if the ukey was installed
or not.  Installation may fail for a few reasons:

 1. Conflicting ukey.
 2. Mutex contention while trying to replace existing ukey.
 3. The same ukey already exists and active.

Only the first case here signals an actual problem.  Third one is
a little odd for userspace datapath, but harmless.  Second is the
most common one that can easily happen during normal operation
since other threads like revalidators may be currently working on
this ukey preventing an immediate access.

Since only the first case is actually worth logging and it already
has its own log message, removing the 'upcall installation fails'
warning from the upcall_cb().  This should fix most of the random
failures of userspace system tests in CI.

While at it, also fixing coverage counters.  Mutex contention was
mistakenly counted as a duplicate upcall.  ukey contention for
revalidators was counted only in one of two places.

New counter added for the ukey contention on replace.  We should
not re-use existing upcall_ukey_contention counter for this, since
it may lead to double counting.

Fixes: 67f08985d769 ("upcall: Replace ukeys for deleted flows.")
Fixes: 9cec8274ed9a ("ofproto-dpif-upcall: Add VLOG_WARN_RL logs for upcall_cb() error.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-04-04 14:09:37 +02:00
-												upcall: Replace ukeys for deleted flows.

If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.

However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.

To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:05 -07:00
+								    return replaced;
 								}
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								/* Attempts to insert a ukey into the shared ukey maps.
 								 *
 								 * On success, returns true, installs the ukey and returns it in a locked
 								 * state. Otherwise, returns false. */
 								static bool
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								ukey_install__(struct udpif *udpif, struct udpif_key *new_ukey)
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    OVS_TRY_LOCK(true, new_ukey->mutex)
 								{
 								    struct umap *umap;
 								    struct udpif_key *old_ukey;
 								    uint32_t idx;
 								    bool locked = false;
 								    idx = new_ukey->hash % N_UMAPS;
 								    umap = &udpif->ukeys[idx];
 								    ovs_mutex_lock(&umap->mutex);
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								    old_ukey = ukey_lookup(udpif, &new_ukey->ufid, new_ukey->pmd_id);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    if (old_ukey) {
 								        /* Uncommon case: A ukey is already installed with the same UFID. */
 								        if (old_ukey->key_len == new_ukey->key_len
 								            && !memcmp(old_ukey->key, new_ukey->key, new_ukey->key_len)) {
-												upcall: Replace ukeys for deleted flows.

If a revalidator dumps/revalidates a flow during the 'dump' phase,
resulting in the deletion of the flow, then the ukey state moves into
UKEY_EVICTED, and the ukey is kept around until the 'sweep' phase. The
ukey is kept around to ensure that cases like duplicated dumps from the
datapaths do not result in multiple attribution of the same stats.

However, if an upcall for this flow comes for a handler between the
revalidator 'dump' and 'sweep' phases, the handler will lookup the ukey
and find that the ukey exists, then skip installing a new flow entirely.
As a result, for this period all traffic for the flow is slowpathed.
If there is a lot of traffic hitting this flow, then it will all be
handled in userspace until the 'sweep' phase. Eventually the
revalidators will reach the sweep phase and delete the ukey, and
subsequently the handlers should install a new flow.

To reduce the slowpathing of this traffic during flow table transitions,
allow the handler to identify this case during miss upcall handling and
replace the existing ukey with a new ukey. The handler will then be able
to install a flow for this traffic, allowing the traffic flow to return
to the fastpath.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:05 -07:00
+								            locked = try_ukey_replace(umap, old_ukey, new_ukey);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        } else {
 								            struct ds ds = DS_EMPTY_INITIALIZER;
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								            odp_format_ufid(&old_ukey->ufid, &ds);
 								            ds_put_cstr(&ds, " ");
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								            odp_flow_key_format(old_ukey->key, old_ukey->key_len, &ds);
 								            ds_put_cstr(&ds, "\n");
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								            odp_format_ufid(&new_ukey->ufid, &ds);
 								            ds_put_cstr(&ds, " ");
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								            odp_flow_key_format(new_ukey->key, new_ukey->key_len, &ds);
 								            VLOG_WARN_RL(&rl, "Conflicting ukey for flows:\n%s", ds_cstr(&ds));
 								            ds_destroy(&ds);
 								        }
 								    } else {
 								        ovs_mutex_lock(&new_ukey->mutex);
 								        cmap_insert(&umap->cmap, &new_ukey->cmap_node, new_ukey->hash);
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								        transition_ukey(new_ukey, UKEY_VISIBLE);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        locked = true;
 								    }
 								    ovs_mutex_unlock(&umap->mutex);
 								    return locked;
 								}
 								static void
-												revalidator: Improve logging for transition_ukey().

There are a few cases where more introspection into ukey transitions
would be relevant for logging or assertion. Track the SOURCE_LOCATOR and
thread id when states are transitioned and use these for logging.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:12 -07:00
+								transition_ukey_at(struct udpif_key *ukey, enum ukey_state dst,
 								                   const char *where)
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    OVS_REQUIRES(ukey->mutex)
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								{
-												revalidator: Improve logging for transition_ukey().

There are a few cases where more introspection into ukey transitions
would be relevant for logging or assertion. Track the SOURCE_LOCATOR and
thread id when states are transitioned and use these for logging.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:12 -07:00
+								    if (dst < ukey->state) {
 								        VLOG_ABORT("Invalid ukey transition %d->%d (last transitioned from "
 								                   "thread %u at %s)", ukey->state, dst, ukey->state_thread,
 								                   ukey->state_where);
 								    }
-												revalidator: Complain for more ukey transitions.

For most ukey transition states, only one thread should be responsible
for transitioning the ukey into the new state. If another thread
attempts to transition the ukey into the same state (for instance,
evicting the datapath flow or deleting the ukey), then it is likely
performing additional work which should only happen once. Log all cases
of ukey transition into the current state, except for UKEY_OPERATIONAL
-> UKEY_OPERATIONAL which regularly occurs when revalidating ukeys.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-10 15:54:03 -08:00
+								    if (ukey->state == dst && dst == UKEY_OPERATIONAL) {
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								        return;
 								    }
 								    /* Valid state transitions:
 								     * UKEY_CREATED -> UKEY_VISIBLE
 								     *  Ukey is now visible in the umap.
 								     * UKEY_VISIBLE -> UKEY_OPERATIONAL
 								     *  A handler has installed the flow, and the flow is in the datapath.
 								     * UKEY_VISIBLE -> UKEY_EVICTING
 								     *  A handler installs the flow, then revalidator sweeps the ukey before
 								     *  the flow is dumped. Most likely the flow was installed; start trying
 								     *  to delete it.
 								     * UKEY_VISIBLE -> UKEY_EVICTED
 								     *  A handler attempts to install the flow, but the datapath rejects it.
 								     *  Consider that the datapath has already destroyed it.
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								     * UKEY_OPERATIONAL -> UKEY_INCONSISTENT
 								     *  A revalidator modifies the flow with error returns.
 								     * UKEY_INCONSISTENT -> UKEY_EVICTING
 								     *  A revalidator decides to evict the datapath flow.
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								     * UKEY_OPERATIONAL -> UKEY_EVICTING
 								     *  A revalidator decides to evict the datapath flow.
 								     * UKEY_EVICTING    -> UKEY_EVICTED
 								     *  A revalidator has evicted the datapath flow.
 								     * UKEY_EVICTED     -> UKEY_DELETED
 								     *  A revalidator has removed the ukey from the umap and is deleting it.
 								     */
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								    if (ukey->state == dst - 1 ||
 								       (ukey->state == UKEY_VISIBLE && dst < UKEY_DELETED) ||
 								       (ukey->state == UKEY_OPERATIONAL && dst == UKEY_EVICTING)) {
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								        ukey->state = dst;
 								    } else {
 								        struct ds ds = DS_EMPTY_INITIALIZER;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								        odp_format_ufid(&ukey->ufid, &ds);
 								        VLOG_WARN_RL(&rl, "Invalid state transition for ukey %s: %d -> %d",
 								                     ds_cstr(&ds), ukey->state, dst);
 								        ds_destroy(&ds);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    }
-												revalidator: Improve logging for transition_ukey().

There are a few cases where more introspection into ukey transitions
would be relevant for logging or assertion. Track the SOURCE_LOCATOR and
thread id when states are transitioned and use these for logging.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:12 -07:00
+								    ukey->state_thread = ovsthread_id_self();
 								    ukey->state_where = where;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								}
 								static bool
 								ukey_install(struct udpif *udpif, struct udpif_key *ukey)
 								{
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    bool installed;
 								    installed = ukey_install__(udpif, ukey);
 								    if (installed) {
 								        ovs_mutex_unlock(&ukey->mutex);
 								    }
 								    return installed;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								}
 								/* Searches for a ukey in 'udpif->ukeys' that matches 'flow' and attempts to
 								 * lock the ukey. If the ukey does not exist, create it.
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								 *
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								 * Returns 0 on success, setting *result to the matching ukey and returning it
 								 * in a locked state. Otherwise, returns an errno and clears *result. EBUSY
 								 * indicates that another thread is handling this flow. Other errors indicate
 								 * an unexpected condition creating a new ukey.
 								 *
 								 * *error is an output parameter provided to appease the threadsafety analyser,
 								 * and its value matches the return value. */
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								static int
 								ukey_acquire(struct udpif *udpif, const struct dpif_flow *flow,
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								             struct udpif_key **result, int *error)
 								    OVS_TRY_LOCK(0, (*result)->mutex)
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								{
-												revalidator: Refactor ukey creation/lookup.

This patch refactors the code around ukey creation and lookup to
simplify the code for callers. A new function ukey_acquire() combines
these functions and attempts to acquire a lock on the ukey. Failure to
acquire a lock on the ukey is usually a sign that another thread is
handling the same flow concurrently, which means the flow does not need
to be handled anyway.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-06-04 09:59:23 +00:00
+								    struct udpif_key *ukey;
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    int retval;
-												revalidator: Refactor ukey creation/lookup.

This patch refactors the code around ukey creation and lookup to
simplify the code for callers. A new function ukey_acquire() combines
these functions and attempts to acquire a lock on the ukey. Failure to
acquire a lock on the ukey is usually a sign that another thread is
handling the same flow concurrently, which means the flow does not need
to be handled anyway.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-06-04 09:59:23 +00:00
-												dpif: Allow adding ukeys for same flow by different pmds.

In multiqueue mode several pmd threads may process one port, but
different queues. Flow may not depend on queue. It's true at least for
vhost-user ports.

When multiple pmd threads attempt to process upcalls for a particular
flow key, only the first will succeed. Any subsequent threads will
receive error = ENOSPC when attempting to insert a new udpif_key into
the umaps. This causes the latter threads to never insert a flow into
the datapath to handle the traffic, and as a result they will
consistently execute those flows through the slow path.

Fix that by mixing pmd_id with the bits from the ufid for ukey->hash
calculation. So, for a given flow key/UFID, each pmd thread will create
an independent udpif_key.

This also opens the possibility to reassign queues among pmd threads
without restarting them and deleting the megaflow cache.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-02-03 14:31:43 +03:00
+								    ukey = ukey_lookup(udpif, &flow->ufid, flow->pmd_id);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    if (ukey) {
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        retval = ovs_mutex_trylock(&ukey->mutex);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    } else {
 								        /* Usually we try to avoid installing flows from revalidator threads,
 								         * because locking on a umap may cause handler threads to block.
 								         * However there are certain cases, like when ovs-vswitchd is
 								         * restarted, where it is desirable to handle flows that exist in the
 								         * datapath gracefully (ie, don't just clear the datapath). */
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        bool install;
 								        retval = ukey_create_from_dpif_flow(udpif, flow, &ukey);
 								        if (retval) {
 								            goto done;
 								        }
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								        install = ukey_install__(udpif, ukey);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        if (install) {
 								            retval = 0;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        } else {
 								            ukey_delete__(ukey);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								            retval = EBUSY;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        }
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    }
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								done:
 								    *error = retval;
 								    if (retval) {
-												revalidator: Refactor ukey creation/lookup.

This patch refactors the code around ukey creation and lookup to
simplify the code for callers. A new function ukey_acquire() combines
these functions and attempts to acquire a lock on the ukey. Failure to
acquire a lock on the ukey is usually a sign that another thread is
handling the same flow concurrently, which means the flow does not need
to be handled anyway.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-06-04 09:59:23 +00:00
+								        *result = NULL;
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    } else {
 								        *result = ukey;
-												revalidator: Refactor ukey creation/lookup.

This patch refactors the code around ukey creation and lookup to
simplify the code for callers. A new function ukey_acquire() combines
these functions and attempts to acquire a lock on the ukey. Failure to
acquire a lock on the ukey is usually a sign that another thread is
handling the same flow concurrently, which means the flow does not need
to be handled anyway.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-06-04 09:59:23 +00:00
+								    }
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    return retval;
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								static void
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								ukey_delete__(struct udpif_key *ukey)
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    OVS_NO_THREAD_SAFETY_ANALYSIS
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								{
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    if (ukey) {
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								        if (ukey->key_recirc_id) {
 								            recirc_free_id(ukey->key_recirc_id);
-												ofproto-dpif: Restore metadata and registers on recirculation.

xlate_actions() now considers an optional recirculation context (via
'xin') and restores OpenFlow pipeline metadata (registers, 'metadata',
etc.) based on it.  The recirculation context may contain an action
set and stack to be restored and further actions to be executed upon
recirculation.  It also contains a table_id number to be used for rule
lookup in cases where no post-recirculation actions are used.

The translation context internal metadata is restored using a new
internal action: UNROLL_XLATE action stores the translation context
data visible to OpenFlow controllers via PACKET_IN messages.  This
includes the current table number and the current rule cookie.
UNROLL_XLATE actions are inserted only when the remaining actions may
generate PACKET_IN messages.

These changes allow the post-MPLS recirculation to properly continue
with the pipeline metadata that existed at the time of recirculation.

The internal table is still consulted for bonds.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2015-03-26 11:18:16 -07:00
+								        }
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								        recirc_refs_unref(&ukey->recircs);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        xlate_cache_delete(ukey->xcache);
-												ofproto-dpif-upcall: Make ukey actions modifiable with RCU.

Future patches will need to modify ukey actions in some instances.
This patch makes this possible by protecting them with RCU.  It also
adds thread safety checks to enforce the new protection mechanism.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-12 14:50:54 -07:00
+								        ofpbuf_delete(ovsrcu_get(struct ofpbuf *, &ukey->actions));
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								        ovs_mutex_destroy(&ukey->mutex);
 								        free(ukey);
 								    }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								}
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								static void
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								ukey_delete(struct umap *umap, struct udpif_key *ukey)
 								    OVS_REQUIRES(umap->mutex)
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								{
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    ovs_mutex_lock(&ukey->mutex);
-												revalidator: Prevent double-delete of ukey.

revalidator_sweep__() splits checking for whether to delete a ukey from
the actual deletion to prevent taking the umap lock for too long.
However it uses information gathered from the first critical section to
decide to call ukey_delete() - ie, the second critical section.

Since 67f08985d769 ("upcall: Replace ukeys for deleted flows."), it is
possible for a handler thread to receive an upcall for the same flow and
to replace the ukey which is being deleted with a new one, in between
these critical sections. This will remove the ukey from the cmap,
rcu-defer its deletion, and update the ukey state.

If this occurs in between the critical sections of revalidator cleanup
of the flow, then the revalidator will subsequently call ukey_delete()
to delete the original ukey, which was already deleted by the handler
thread. This leads to a segfault in cmap_replace__().

Guard against this by checking the ukey state in ukey_delete() while
holding the ukey lock.

Backtrace:
    Program terminated with signal 11, Segmentation fault.
    #0  0x00007fe969b13da3 in cmap_replace__ ()
    #1  0x00007fe969b14491 in cmap_replace ()
    #2  0x00007fe969aee9ff in ukey_delete ()
    #3  0x00007fe969aefd42 in revalidator_sweep__ ()
    #4  0x00007fe969af1bad in udpif_revalidator ()
    #5  0x00007fe969b8b2a6 in ovsthread_wrapper ()
    #6  0x00007fe968e07dc5 in start_thread () from /lib64/libpthread.so.0
    #7  0x00007fe96862c73d in clone () from /lib64/libc.so.6

Fixes: 54ebeff4c03d ("upcall: Track ukey states.")
Fixes: 67f08985d769 ("upcall: Replace ukeys for deleted flows.")
Reported-by: Numan Siddique <nusiddiq@redhat.com>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-01-10 15:54:02 -08:00
+								    if (ukey->state < UKEY_DELETED) {
 								        cmap_remove(&umap->cmap, &ukey->cmap_node, ukey->hash);
 								        ovsrcu_postpone(ukey_delete__, ukey);
 								        transition_ukey(ukey, UKEY_DELETED);
 								    }
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    ovs_mutex_unlock(&ukey->mutex);
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								}
-												revalidator: Only revalidate high-throughput flows.

Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.

This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)

After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:

* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.

The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.

Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
    Calculate the mean time between packets for comparison, rather than
      comparing the number of packets since the last dump.
RFC: First post.

											
										
										
											2014-03-04 09:36:37 -08:00
+								static bool
-												ofproto-dpif-upcall: Wait for valid hw flow stats before applying min-revalidate-pps.

Depending on the driver implementation, it can take from 0.2 seconds
up to 2 seconds before offloaded flow statistics are updated. This is
true for both TC and rte_flow-based offloading. This is causing a
problem with min-revalidate-pps, as old statistic values are used
during this period.

This fix will wait for at least 2 seconds, by default, before assuming no
packets where received during this period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-08 13:55:44 +01:00
+								should_revalidate(const struct udpif *udpif, const struct udpif_key *ukey,
 								                  uint64_t packets)
 								    OVS_REQUIRES(ukey->mutex)
-												revalidator: Only revalidate high-throughput flows.

Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.

This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)

After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:

* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.

The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.

Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
    Calculate the mean time between packets for comparison, rather than
      comparing the number of packets since the last dump.
RFC: First post.

											
										
										
											2014-03-04 09:36:37 -08:00
+								{
 								    long long int metric, now, duration;
-												ofproto-dpif-upcall: Wait for valid hw flow stats before applying min-revalidate-pps.

Depending on the driver implementation, it can take from 0.2 seconds
up to 2 seconds before offloaded flow statistics are updated. This is
true for both TC and rte_flow-based offloading. This is causing a
problem with min-revalidate-pps, as old statistic values are used
during this period.

This fix will wait for at least 2 seconds, by default, before assuming no
packets where received during this period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-08 13:55:44 +01:00
+								    long long int used = ukey->stats.used;
-												revalidator: Only revalidate high-throughput flows.

Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.

This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)

After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:

* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.

The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.

Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
    Calculate the mean time between packets for comparison, rather than
      comparing the number of packets since the last dump.
RFC: First post.

											
										
										
											2014-03-04 09:36:37 -08:00
-												revalidator: Allow min-revalidator-pps to be 0.

Today the minimum value for this setting is 1. This patch allows it to
be 0, meaning not checking pps at all, and always do revalidation.

This is particularly useful for environments where some of the
applications with long-lived connections may have very low traffic for
certain period but have high rate of burst periodically. It is desirable
to keep the datapath flows instead of periodically deleting them to
avoid burst of packet miss to userspace.

When setting to 0, there may be more datapath flows to be revalidated,
resulting in higher CPU cost of revalidator threads. This is the
downside but in certain cases this is still more desirable than packet
misses to user space.

Signed-off-by: Han Zhou <hzhou@ovn.org>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-16 19:01:29 -08:00
+								    if (!ofproto_min_revalidate_pps) {
 								        return true;
 								    }
-												revalidator: Refactor revalidation early exit.

Shift the early-exit conditions for revalidation into a separate
function.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 13:13:04 -07:00
+								    if (!used) {
 								        /* Always revalidate the first time a flow is dumped. */
 								        return true;
 								    }
-												upcall: Change should_revalidate to use max-revalidator value

Revalidate if dump duration was longer than half of max-revalidator
timeout, instead of hardcoded 200msec value.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-07-21 11:34:22 +03:00
+								    if (udpif->dump_duration < ofproto_max_revalidator / 2) {
-												revalidator: Improve optimization to skip revalidation.

The should_revalidate() optimisation introduced with commit 698ffe3623
(revalidator: Only revalidate high-throughput flows.) was a little
aggressive, occasionally deleting flows even when OVS is quite capable
of performing full revalidation.

This commit modifies the logic to:
* Firstly, check if we are likely to handle full revalidation, and
  attempt that instead.
* Secondly, fall back to the existing flow throughput estimations to
  determine whether to revalidate the flow or just delete it.

VMware-BZ: #1271926

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-07-02 07:41:33 +00:00
+								        /* We are likely to handle full revalidation for the flows. */
 								        return true;
 								    }
-												revalidator: Only revalidate high-throughput flows.

Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.

This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)

After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:

* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.

The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.

Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
    Calculate the mean time between packets for comparison, rather than
      comparing the number of packets since the last dump.
RFC: First post.

											
										
										
											2014-03-04 09:36:37 -08:00
+								    /* Calculate the mean time between seeing these packets. If this
 								     * exceeds the threshold, then delete the flow rather than performing
 								     * costly revalidation for flows that aren't being hit frequently.
 								     *
 								     * This is targeted at situations where the dump_duration is high (~1s),
 								     * and revalidation is triggered by a call to udpif_revalidate(). In
 								     * these situations, revalidation of all flows causes fluctuations in the
 								     * flow_limit due to the interaction with the dump_duration and max_idle.
 								     * This tends to result in deletion of low-throughput flows anyway, so
 								     * skip the revalidation and just delete those flows. */
 								    packets = MAX(packets, 1);
 								    now = MAX(used, time_msec());
 								    duration = now - used;
 								    metric = duration / packets;
-												ofproto-dpif-upcall: Wait for valid hw flow stats before applying min-revalidate-pps.

Depending on the driver implementation, it can take from 0.2 seconds
up to 2 seconds before offloaded flow statistics are updated. This is
true for both TC and rte_flow-based offloading. This is causing a
problem with min-revalidate-pps, as old statistic values are used
during this period.

This fix will wait for at least 2 seconds, by default, before assuming no
packets where received during this period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-08 13:55:44 +01:00
+								    if (metric < 1000 / ofproto_min_revalidate_pps ||
 								        (ukey->offloaded && duration < ofproto_offloaded_stats_delay)) {
 								        /* The flow is receiving more than min-revalidate-pps, so keep it.
 								         * Or it's a hardware offloaded flow that might take up to X seconds
 								         * to update its statistics. Until we are sure the statistics had a
 								         * chance to be updated, also keep it. */
-												revalidator: Improve optimization to skip revalidation.

The should_revalidate() optimisation introduced with commit 698ffe3623
(revalidator: Only revalidate high-throughput flows.) was a little
aggressive, occasionally deleting flows even when OVS is quite capable
of performing full revalidation.

This commit modifies the logic to:
* Firstly, check if we are likely to handle full revalidation, and
  attempt that instead.
* Secondly, fall back to the existing flow throughput estimations to
  determine whether to revalidate the flow or just delete it.

VMware-BZ: #1271926

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-07-02 07:41:33 +00:00
+								        return true;
-												revalidator: Only revalidate high-throughput flows.

Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.

This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)

After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:

* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.

The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.

Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
    Calculate the mean time between packets for comparison, rather than
      comparing the number of packets since the last dump.
RFC: First post.

											
										
										
											2014-03-04 09:36:37 -08:00
+								    }
-												revalidator: Improve optimization to skip revalidation.

The should_revalidate() optimisation introduced with commit 698ffe3623
(revalidator: Only revalidate high-throughput flows.) was a little
aggressive, occasionally deleting flows even when OVS is quite capable
of performing full revalidation.

This commit modifies the logic to:
* Firstly, check if we are likely to handle full revalidation, and
  attempt that instead.
* Secondly, fall back to the existing flow throughput estimations to
  determine whether to revalidate the flow or just delete it.

VMware-BZ: #1271926

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-07-02 07:41:33 +00:00
+								    return false;
-												revalidator: Only revalidate high-throughput flows.

Previously we would revalidate all flows if the "need_revalidate" flag
was raised. This patch modifies the logic to delete low throughput flows
rather than revalidate them. High-throughput flows are unaffected by
this change. This patch identifies the flows based on the mean time
between packets since the last dump.

This change is primarily targeted at situations where:
* Flow dump duration is high (~1 second)
* Revalidation is triggered. (eg, by bridge reconfiguration or learning)

After the need_revalidate flag is set, next time a new flow dump session
starts, revalidators will begin revalidating the flows. This full
revalidation is more expensive, which significantly increases the flow
dump duration. At the end of this dump session, the datapath flow
management algorithms kick in for the next dump:

* If flow dump duration becomes too long, the flow limit is decreased.
* The number of flows in the datapath then exceeds the flow_limit.
* As the flow_limit is exceeded, max_idle is temporarily set to 100ms.
* Revalidators delete all flows that haven't seen traffic recently.

The effect of this is that many low-throughput flows are deleted after
revalidation, even if they are valid. The revalidation is unnecessary
for flows that would be deleted anyway, so this patch skips the
revalidation step for those flows.

Note that this patch will only perform this optimization if the flow has
already been dumped at least once, and only if the time since the last
dump is sufficiently long. This gives the flow a chance to become
high-throughput.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Determine "high-throughput" by packets rather than bytes.
    Calculate the mean time between packets for comparison, rather than
      comparing the number of packets since the last dump.
RFC: First post.

											
										
										
											2014-03-04 09:36:37 -08:00
+								}
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								struct reval_context {
 								    /* Optional output parameters */
 								    struct flow_wildcards *wc;
 								    struct ofpbuf *odp_actions;
 								    struct netflow **netflow;
 								    struct xlate_cache *xcache;
 								    /* Required output parameters */
 								    struct xlate_out xout;
 								    struct flow flow;
 								};
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								/* Translates 'key' into a flow, populating 'ctx' as it goes along.
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								 *
 								 * Returns 0 on success, otherwise a positive errno value.
 								 *
 								 * The caller is responsible for uninitializing ctx->xout on success.
 								 */
 								static int
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								xlate_key(struct udpif *udpif, const struct nlattr *key, unsigned int len,
 								          const struct dpif_flow_stats *push, struct reval_context *ctx)
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								{
 								    struct ofproto_dpif *ofproto;
 								    ofp_port_t ofp_in_port;
-												ofproto-dpif-upcall: Slow path flows that datapath can't fully match.

In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:

    - The flow lacks one or more fields that userspace expects to be there,
      that is, the datapath doesn't understand or parse them but userspace
      does.  This is, for example, what would happen if current OVS
      userspace, which understands and extracts TCP flags, were to be
      paired with an older OVS kernel module, which does not.  Internally
      OVS uses the name ODP_FIT_TOO_LITTLE for this situation.

    - The flow includes fields that userspace does not know about, that is,
      the datapath understands and parses them but userspace does not.
      This is, for example, what would happen if an old OVS userspace that
      does not understand or extract TCP flags, were to be paired with a
      recent OVS kernel module that does.  Internally, OVS uses the name
      ODP_FIT_TOO_MUCH for this situation.

The latter is not a big deal and OVS doesn't have to do much to cope with
it.

The former is more of a problem.  When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path.  Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.

Somehow, this nuance was lost a long time.  From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE.  Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it.  I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix.  This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.

CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-24 11:40:19 -08:00
+								    enum odp_key_fitness fitness;
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								    struct xlate_in xin;
 								    int error;
-												odp-util: Improve log messages and error reporting for Netlink parsing.

As a side effect, this also reduces a lot of log messages' severities from
ERR to WARN.  They just didn't seem like messages that in general reported
anything that would prevent functioning.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-12-14 18:16:55 -08:00
+								    fitness = odp_flow_key_to_flow(key, len, &ctx->flow, NULL);
-												ofproto-dpif-upcall: Slow path flows that datapath can't fully match.

In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:

    - The flow lacks one or more fields that userspace expects to be there,
      that is, the datapath doesn't understand or parse them but userspace
      does.  This is, for example, what would happen if current OVS
      userspace, which understands and extracts TCP flags, were to be
      paired with an older OVS kernel module, which does not.  Internally
      OVS uses the name ODP_FIT_TOO_LITTLE for this situation.

    - The flow includes fields that userspace does not know about, that is,
      the datapath understands and parses them but userspace does not.
      This is, for example, what would happen if an old OVS userspace that
      does not understand or extract TCP flags, were to be paired with a
      recent OVS kernel module that does.  Internally, OVS uses the name
      ODP_FIT_TOO_MUCH for this situation.

The latter is not a big deal and OVS doesn't have to do much to cope with
it.

The former is more of a problem.  When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path.  Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.

Somehow, this nuance was lost a long time.  From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE.  Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it.  I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix.  This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.

CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-24 11:40:19 -08:00
+								    if (fitness == ODP_FIT_ERROR) {
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								        return EINVAL;
 								    }
 								    error = xlate_lookup(udpif->backer, &ctx->flow, &ofproto, NULL, NULL,
-												ofproto-dpif-upcall: Print more data on unassociated datapath ports.

When OVS fails to find an OpenFlow port for a packet received
from the upcall it just prints the warning like this:

  |INFO|received packet on unassociated datapath port N

However, during the flow translation more information is available
as if the recirculation id wasn't found or it was a packet from
unknown tunnel port.  Printing that information might be useful
to understand the origin of the problem.

Port translation functions already support extended error strings,
we just need to pass a variable where to store them.

With the change the output may be:

  |INFO|received packet on unassociated datapath port N
        (no OpenFlow port for datapath port N)
or
  |INFO|received packet on unassociated datapath port N
        (no OpenFlow tunnel port for this packet)
or
  |INFO|received packet on unassociated datapath port N
        (no recirculation data for recirc_id M)

Unfortunately, there is no good way to trigger this code from
current unit tests.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-01 17:42:49 +02:00
+								                         ctx->netflow, &ofp_in_port, NULL);
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								    if (error) {
 								        return error;
 								    }
 								    xlate_in_init(&xin, ofproto, ofproto_dpif_get_tables_version(ofproto),
 								                  &ctx->flow, ofp_in_port, NULL, push->tcp_flags,
 								                  NULL, ctx->wc, ctx->odp_actions);
 								    if (push->n_packets) {
 								        xin.resubmit_stats = push;
 								        xin.allow_side_effects = true;
 								    }
 								    xin.xcache = ctx->xcache;
 								    xlate_actions(&xin, &ctx->xout);
-												ofproto-dpif-upcall: Slow path flows that datapath can't fully match.

In the OVS architecture, when a datapath doesn't have a match for a packet,
it sends the packet and the flow that it extracted from it to userspace.
Userspace then examines the packet and the flow and compares them.
Commonly, the flow is the same as what userspace expects, given the packet,
but there are two other possibilities:

    - The flow lacks one or more fields that userspace expects to be there,
      that is, the datapath doesn't understand or parse them but userspace
      does.  This is, for example, what would happen if current OVS
      userspace, which understands and extracts TCP flags, were to be
      paired with an older OVS kernel module, which does not.  Internally
      OVS uses the name ODP_FIT_TOO_LITTLE for this situation.

    - The flow includes fields that userspace does not know about, that is,
      the datapath understands and parses them but userspace does not.
      This is, for example, what would happen if an old OVS userspace that
      does not understand or extract TCP flags, were to be paired with a
      recent OVS kernel module that does.  Internally, OVS uses the name
      ODP_FIT_TOO_MUCH for this situation.

The latter is not a big deal and OVS doesn't have to do much to cope with
it.

The former is more of a problem.  When the datapath can't match on all the
fields that OVS supports, it means that OVS can't safely install a flow at
all, other than one that directs packets to the slow path.  Otherwise, if
OVS did install a flow, it could match a packet that does not match the
flow that OVS intended to match and could cause the wrong behavior.

Somehow, this nuance was lost a long time.  From about 2013 until today,
it seems that OVS has ignored ODP_FIT_TOO_LITTLE.  Instead, it happily
installs a flow regardless of whether the datapath can actually fully match
it.  I imagine that this is rarely a problem because most of the time
the datapath and userspace are well matched, but it is still an important
problem to fix.  This commit fixes it, by forcing flows into the slow path
when the datapath cannot match specifically enough.

CC: Ethan Jackson <ejj@eecs.berkeley.edu>
Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Reported-by: Huanle Han <hanxueluo@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2018-January/343665.html
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-01-24 11:40:19 -08:00
+								    if (fitness == ODP_FIT_TOO_LITTLE) {
 								        ctx->xout.slow |= SLOW_MATCH;
 								    }
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
 								    return 0;
 								}
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								static int
 								xlate_ukey(struct udpif *udpif, const struct udpif_key *ukey,
-												revalidator: Defer stats push to end of validation.

To make more of the core revalidate() functions do just one thing and
not modify state on the way, refactor them to prepare the xcache then
defer the ukey modification and stats/side effects execution to the end
of successful revalidation.

If revalidation causes deletion, then the xcache will be prepared and
attached to the ukey, but the actual execution will be skipped since it
will be executed on flow_delete very soon anyway with final stats.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:08:21 -07:00
+								           uint16_t tcp_flags, struct reval_context *ctx)
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								{
-												revalidator: Defer stats push to end of validation.

To make more of the core revalidate() functions do just one thing and
not modify state on the way, refactor them to prepare the xcache then
defer the ukey modification and stats/side effects execution to the end
of successful revalidation.

If revalidation causes deletion, then the xcache will be prepared and
attached to the ukey, but the actual execution will be skipped since it
will be executed on flow_delete very soon anyway with final stats.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:08:21 -07:00
+								    struct dpif_flow_stats push = {
 								        .tcp_flags = tcp_flags,
 								    };
 								    return xlate_key(udpif, ukey->key, ukey->key_len, &push, ctx);
 								}
 								static int
 								populate_xcache(struct udpif *udpif, struct udpif_key *ukey,
 								                uint16_t tcp_flags)
 								    OVS_REQUIRES(ukey->mutex)
 								{
 								    struct reval_context ctx = {
 								        .odp_actions = NULL,
 								        .netflow = NULL,
 								        .wc = NULL,
 								    };
 								    int error;
 								    ovs_assert(!ukey->xcache);
 								    ukey->xcache = ctx.xcache = xlate_cache_new();
 								    error = xlate_ukey(udpif, ukey, tcp_flags, &ctx);
 								    if (error) {
 								        return error;
 								    }
 								    xlate_out_uninit(&ctx.xout);
 								    return 0;
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								}
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								static enum reval_result
-												revalidator: Simplify full-revalidation code.

Simplify the remaining bits of the original revalidation codepath to
only handle the "full-revalidation" case. Make the 'ukey' parameter
purely const by pushing the only piece that gets changed into a separate
argument.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:58:00 -07:00
+								revalidate_ukey__(struct udpif *udpif, const struct udpif_key *ukey,
-												revalidator: Defer stats push to end of validation.

To make more of the core revalidate() functions do just one thing and
not modify state on the way, refactor them to prepare the xcache then
defer the ukey modification and stats/side effects execution to the end
of successful revalidation.

If revalidation causes deletion, then the xcache will be prepared and
attached to the ukey, but the actual execution will be skipped since it
will be executed on flow_delete very soon anyway with final stats.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:08:21 -07:00
+								                  uint16_t tcp_flags, struct ofpbuf *odp_actions,
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								                  struct recirc_refs *recircs, struct xlate_cache *xcache,
 								                  enum flow_del_reason *del_reason)
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								{
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								    struct xlate_out *xoutp;
-												ofproto-dpif-upcall: Expire netflow flow when revalidate_ukey failed

This fixes missing netflow flows in
"ofproto-dpif - NetFlow flow expiration" tests.

Acked-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-04-01 21:21:45 +09:00
+								    struct netflow *netflow;
-												ofproto-dpif-upcall: Use flow_wildcards_has_extra().

Update the comment in ukey_revalidate() to reflect the fact that the
mask in ukey is not the datapath mask, but the originally translated
flow wildcards.

Use flow_wildcards_has_extra() instead of open coding equivalent (but
different) functionality.  The old form and the code in
flow_wildcards_has_extra() ((dp | wc != dp) and (dp & wc != wc),
respecively) give the same result:

dp   wc    (dp | wc != dp)        (dp & wc != wc)
-------------------------------------------------------
0    0      (0 | 0 != 0) (false)   (0 & 0 != 0) (false)
0    1      (0 | 1 != 0) (true)    (0 & 1 != 1) (true)
1    0      (1 | 0 != 1) (false)   (1 & 0 != 0) (false)
1    1      (1 | 1 != 1) (false)   (1 & 1 != 1) (false)

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-09-29 14:21:33 -07:00
+								    struct flow_wildcards dp_mask, wc;
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								    enum reval_result result;
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								    struct reval_context ctx = {
 								        .odp_actions = odp_actions,
 								        .netflow = &netflow,
-												revalidator: Simplify full-revalidation code.

Simplify the remaining bits of the original revalidation codepath to
only handle the "full-revalidation" case. Make the 'ukey' parameter
purely const by pushing the only piece that gets changed into a separate
argument.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:58:00 -07:00
+								        .xcache = xcache,
 								        .wc = &wc,
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								    };
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												utilities: Add revalidator measurement script and needed USDT probes.

This patch adds a Python script that can be used to analyze the
revalidator runs by providing statistics (including some real time
graphs).

The USDT events can also be captured to a file and used for
later offline analysis.

The following blog explains the Open vSwitch revalidator
implementation and how this tool can help you understand what is
happening in your system.

https://developers.redhat.com/articles/2022/10/19/open-vswitch-revalidator-process-explained

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-23 12:03:29 +01:00
+								    OVS_USDT_PROBE(revalidate_ukey__, entry, udpif, ukey, tcp_flags,
 								                   odp_actions, recircs, xcache);
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								    result = UKEY_DELETE;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    xoutp = NULL;
-												ofproto-dpif-upcall: Expire netflow flow when revalidate_ukey failed

This fixes missing netflow flows in
"ofproto-dpif - NetFlow flow expiration" tests.

Acked-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-04-01 21:21:45 +09:00
+								    netflow = NULL;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												revalidator: Defer stats push to end of validation.

To make more of the core revalidate() functions do just one thing and
not modify state on the way, refactor them to prepare the xcache then
defer the ukey modification and stats/side effects execution to the end
of successful revalidation.

If revalidation causes deletion, then the xcache will be prepared and
attached to the ukey, but the actual execution will be skipped since it
will be executed on flow_delete very soon anyway with final stats.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:08:21 -07:00
+								    if (xlate_ukey(udpif, ukey, tcp_flags, &ctx)) {
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								        *del_reason = FDR_XLATION_ERROR;
-												ofproto: Reorganize in preparation for direct dpdk upcalls.

This patch reorganizes ofproto-dpif in preparation for future patches
which allow direct upcall processing from dpif-netdev.  The main goals
are to share as much code as possible between the dpif-linux and
dpif-netdev upcall paths.  Additionally, to avoid confusing the
dpif-netdev fast path, the packet processing path should treat packets
and struct flow's as const.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 18:49:44 -07:00
+								        goto exit;
 								    }
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								    xoutp = &ctx.xout;
-												ofproto-dpif-upcall: Fix a memory leak.

The "key" member in struct flow_miss refers to memory held by the "struct
upcall", hence the upcalls should be freed only after the flow misses are
processed by the main thread.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-23 10:57:22 -07:00
-												ofp-actions: Add limit to learn action.

This commit adds a new feature to the learn actions: the possibility to
limit the number of learned flows.

To be compatible with users of the old learn action, a new structure is
introduced as well as a new OpenFlow raw action number.

There's a small corner case when we have to delete the ukey.  This
happens when:
* The learned rule has expired (or has been deleted).
* The ukey that learned the rule is still in the datapath.
* No packets hit the datapath flow recently.
In this case we cannot relearn the rule (because there are no new
packets), and the actions might depend on the learn execution, so the
only option is to delete the ukey.  I don't think this has big
performance implications since it's done only for ukey with no traffic.

We could also slowpath it, but that will cause an action upcall and the
correct datapath actions will be installed later by a revalidator.  If
we delete the ukey, the next upcall will be a miss upcall and that will
immediatedly install the correct datapath flow.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-10 15:44:40 -08:00
+								    if (xoutp->avoid_caching) {
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								        *del_reason = FDR_AVOID_CACHING;
-												ofp-actions: Add limit to learn action.

This commit adds a new feature to the learn actions: the possibility to
limit the number of learned flows.

To be compatible with users of the old learn action, a new structure is
introduced as well as a new OpenFlow raw action number.

There's a small corner case when we have to delete the ukey.  This
happens when:
* The learned rule has expired (or has been deleted).
* The ukey that learned the rule is still in the datapath.
* No packets hit the datapath flow recently.
In this case we cannot relearn the rule (because there are no new
packets), and the actions might depend on the learn execution, so the
only option is to delete the ukey.  I don't think this has big
performance implications since it's done only for ukey with no traffic.

We could also slowpath it, but that will cause an action upcall and the
correct datapath actions will be installed later by a revalidator.  If
we delete the ukey, the next upcall will be a miss upcall and that will
immediatedly install the correct datapath flow.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-03-10 15:44:40 -08:00
+								        goto exit;
 								    }
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								    if (xoutp->slow) {
-												ofproto: Meter slowpath action when action upcall meters are configured

If a slow path action is a controller action, meter it when the
controller meter is configured.  For other kinds of slow path actions,
meter it when the slowpath meter is configured.

Note, this patch only considers the meters configuration of the
packet's input bridge, which may not be the same bridge that the
action is generated.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-04-11 16:10:41 -07:00
+								        struct ofproto_dpif *ofproto;
-												ofproto: Add 'ofproto_uuid' and 'ofp_in_port' to user action cookie.

Previously, the ofproto instance and OpenFlow port have been derived
based on the datapath port number.  This change explicitly declares them
both, which will be helpful in future commits that no longer can depend
on having a unique datapath port (e.g., a source port that represents
the controller).

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-10-03 17:31:34 -07:00
+								        ofp_port_t ofp_in_port;
-												odp-util: Improve log messages and error reporting for Netlink parsing.

As a side effect, this also reduces a lot of log messages' severities from
ERR to WARN.  They just didn't seem like messages that in general reported
anything that would prevent functioning.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-12-14 18:16:55 -08:00
+								        ofproto = xlate_lookup_ofproto(udpif->backer, &ctx.flow, &ofp_in_port,
 								                                       NULL);
-												ofproto: Meter slowpath action when action upcall meters are configured

If a slow path action is a controller action, meter it when the
controller meter is configured.  For other kinds of slow path actions,
meter it when the slowpath meter is configured.

Note, this patch only considers the meters configuration of the
packet's input bridge, which may not be the same bridge that the
action is generated.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-04-11 16:10:41 -07:00
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								        ofpbuf_clear(odp_actions);
-												ofproto-dpif-upcall: fix for segmentation fault

Added check for NULL pointer on return from xlate_lookup_ofproto
function. Access to "ofproto" variable when NULL was causing segmentation
fault.

VMware-BZ: #2061914
CC: Justin Pettit <jpettit@ovn.org>
Fixes: d39ec23de384 ("ofproto-dpif: Don't slow-path controller actions.")
Signed-off-by: Ashish Varma <ashishvarma.ovs@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-03-05 15:04:01 -08:00
 								        if (!ofproto) {
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								            *del_reason = FDR_NO_OFPROTO;
-												ofproto-dpif-upcall: fix for segmentation fault

Added check for NULL pointer on return from xlate_lookup_ofproto
function. Access to "ofproto" variable when NULL was causing segmentation
fault.

VMware-BZ: #2061914
CC: Justin Pettit <jpettit@ovn.org>
Fixes: d39ec23de384 ("ofproto-dpif: Don't slow-path controller actions.")
Signed-off-by: Ashish Varma <ashishvarma.ovs@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-03-05 15:04:01 -08:00
+								            goto exit;
 								        }
-												dpif: Remove support for multiple queues per port.

Commit 69c51582ff78 ("dpif-netlink: don't allocate per thread netlink
sockets") removed dpif-netlink support for multiple queues per port.
No remaining dpif provider supports multiple queues per port, so
remove infrastructure for the feature.

CC: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-09-25 15:14:13 -07:00
+								        compose_slow_path(udpif, xoutp, ctx.flow.in_port.odp_port,
-												ofproto-dpif: Don't slow-path controller actions.

Controller actions have become more commonly used for purposes other
than just making forwarding decisions (e.g., packet logging).  A packet
that needs to be copied to the controller and forwarded would always be
sent to ovs-vswitchd to be handled, which could negatively affect
performance and cause heavier CPU utilization in ovs-vswitchd.

This commit changes the behavior so that OpenFlow controller actions
become userspace datapath actions while continuing to let packet
forwarding and manipulation continue to be handled by the datapath
directly.

This patch still slow-paths controller actions with the "pause" flag
set.  A future patch will stop slow-pathing these pause actions as
well.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-07-05 15:17:52 -07:00
+								                          ofp_in_port, odp_actions,
 								                          ofproto->up.slowpath_meter_id, &ofproto->uuid);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    }
-												odp-util: Improve log messages and error reporting for Netlink parsing.

As a side effect, this also reduces a lot of log messages' severities from
ERR to WARN.  They just didn't seem like messages that in general reported
anything that would prevent functioning.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-12-14 18:16:55 -08:00
+								    if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, &dp_mask, &ctx.flow,
 								                             NULL)
-												ofproto-dpif-upcall: Use flow_wildcards_has_extra().

Update the comment in ukey_revalidate() to reflect the fact that the
mask in ukey is not the datapath mask, but the originally translated
flow wildcards.

Use flow_wildcards_has_extra() instead of open coding equivalent (but
different) functionality.  The old form and the code in
flow_wildcards_has_extra() ((dp | wc != dp) and (dp & wc != wc),
respecively) give the same result:

dp   wc    (dp | wc != dp)        (dp & wc != wc)
-------------------------------------------------------
0    0      (0 | 0 != 0) (false)   (0 & 0 != 0) (false)
0    1      (0 | 1 != 0) (true)    (0 & 1 != 1) (true)
1    0      (1 | 0 != 1) (false)   (1 & 0 != 0) (false)
1    1      (1 | 1 != 1) (false)   (1 & 1 != 1) (false)

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-09-29 14:21:33 -07:00
+								        == ODP_FIT_ERROR) {
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								        *del_reason = FDR_BAD_ODP_FIT;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        goto exit;
 								    }
-												ofproto-dpif-upcall: Use flow_wildcards_has_extra().

Update the comment in ukey_revalidate() to reflect the fact that the
mask in ukey is not the datapath mask, but the originally translated
flow wildcards.

Use flow_wildcards_has_extra() instead of open coding equivalent (but
different) functionality.  The old form and the code in
flow_wildcards_has_extra() ((dp | wc != dp) and (dp & wc != wc),
respecively) give the same result:

dp   wc    (dp | wc != dp)        (dp & wc != wc)
-------------------------------------------------------
0    0      (0 | 0 != 0) (false)   (0 & 0 != 0) (false)
0    1      (0 | 1 != 0) (true)    (0 & 1 != 1) (true)
1    0      (1 | 0 != 1) (false)   (1 & 0 != 0) (false)
1    1      (1 | 1 != 1) (false)   (1 & 1 != 1) (false)

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-09-29 14:21:33 -07:00
+								    /* Do not modify if any bit is wildcarded by the installed datapath flow,
 								     * but not the newly revalidated wildcard mask (wc), i.e., if revalidation
 								     * tells that the datapath flow is now too generic and must be narrowed
 								     * down.  Note that we do not know if the datapath has ignored any of the
-												ofproto-dpif-upcall: Fix typo in comment.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2018-01-24 09:47:23 -08:00
+								     * wildcarded bits, so we may be overly conservative here. */
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								    if (flow_wildcards_has_extra(&dp_mask, ctx.wc)) {
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								        *del_reason = FDR_FLOW_WILDCARDED;
-												ofproto-dpif-upcall: Use flow_wildcards_has_extra().

Update the comment in ukey_revalidate() to reflect the fact that the
mask in ukey is not the datapath mask, but the originally translated
flow wildcards.

Use flow_wildcards_has_extra() instead of open coding equivalent (but
different) functionality.  The old form and the code in
flow_wildcards_has_extra() ((dp | wc != dp) and (dp & wc != wc),
respecively) give the same result:

dp   wc    (dp | wc != dp)        (dp & wc != wc)
-------------------------------------------------------
0    0      (0 | 0 != 0) (false)   (0 & 0 != 0) (false)
0    1      (0 | 1 != 0) (true)    (0 & 1 != 1) (true)
1    0      (1 | 0 != 1) (false)   (1 & 0 != 0) (false)
1    1      (1 | 1 != 1) (false)   (1 & 1 != 1) (false)

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-09-29 14:21:33 -07:00
+								        goto exit;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    }
-												upcall: Revalidate using cache of mask, actions.

This allows us to ignore most fields of a flow_dump, requiring only the
flow key for looking up the ukey. Fetching flows can also be avoided in
the corner case where a flow is missed from a dump but revalidation is
required.

A future patch will modify the datapath interface to allow datapaths to
skip dumping these fields, so this cache will be used instead.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-06 16:40:37 +12:00
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								    if (!ofpbuf_equal(odp_actions,
 								                      ovsrcu_get(struct ofpbuf *, &ukey->actions))) {
 								        /* The datapath mask was OK, but the actions seem to have changed.
 								         * Let's modify it in place. */
 								        result = UKEY_MODIFY;
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								        /* Transfer recirc action ID references to the caller. */
 								        recirc_refs_swap(recircs, &xoutp->recircs);
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								        goto exit;
 								    }
 								    result = UKEY_KEEP;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
 								exit:
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								    if (netflow && result == UKEY_DELETE) {
-												revalidator: Refactor ukey->xout translation.

This patch shifts the code that directly calls xlate into a separate
function, xlate_ukey().

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 12:33:51 -07:00
+								        netflow_flow_clear(netflow, &ctx.flow);
-												ofproto-dpif-upcall: Expire netflow flow when revalidate_ukey failed

This fixes missing netflow flows in
"ofproto-dpif - NetFlow flow expiration" tests.

Acked-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: YAMAMOTO Takashi <yamamoto@valinux.co.jp>

											
										
										
											2014-04-01 21:21:45 +09:00
+								    }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    xlate_out_uninit(xoutp);
-												utilities: Add revalidator measurement script and needed USDT probes.

This patch adds a Python script that can be used to analyze the
revalidator runs by providing statistics (including some real time
graphs).

The USDT events can also be captured to a file and used for
later offline analysis.

The following blog explains the Open vSwitch revalidator
implementation and how this tool can help you understand what is
happening in your system.

https://developers.redhat.com/articles/2022/10/19/open-vswitch-revalidator-process-explained

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-23 12:03:29 +01:00
 								    OVS_USDT_PROBE(revalidate_ukey__, exit, udpif, ukey, result);
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								    return result;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								}
-												ofproto-dpif-upcall: Don't set statistics to 0 when they jump back.

The only way that stats->{n_packets,n_bytes} would decrease is due to an
overflow, or if there are bugs in how statistics are handled. In the
past, there were multiple issues that caused a jump backward. A
workaround was in place to set the statistics to 0 in that case. When
this happened while the revalidator was under heavy load, the workaround
had an unintended side effect where should_revalidate returned false
causing the flow to be removed because the metric it calculated was
based on a bogus value. Since many of those bugs have now been
identified and resolved, there is no need to set the statistics to 0. In
addition, the (unlikely) overflow still needs to be handled
appropriately. If an unexpected jump does happen, just log it as a
warning.

Signed-off-by: Balazs Nemeth <bnemeth@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-26 14:03:38 +02:00
+								static void
 								log_unexpected_stats_jump(struct udpif_key *ukey,
 								                          const struct dpif_flow_stats *stats)
 								    OVS_REQUIRES(ukey->mutex)
 								{
 								    static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5);
 								    struct ds ds = DS_EMPTY_INITIALIZER;
 								    struct ofpbuf *actions;
 								    odp_format_ufid(&ukey->ufid, &ds);
 								    ds_put_cstr(&ds, ", ");
 								    odp_flow_key_format(ukey->key, ukey->key_len, &ds);
 								    ds_put_cstr(&ds, ", actions:");
 								    actions = ovsrcu_get(struct ofpbuf *, &ukey->actions);
 								    format_odp_actions(&ds, actions->data, actions->size, NULL);
 								    VLOG_WARN_RL(&rll, "Unexpected jump in packet stats from %"PRIu64
 								                 " to %"PRIu64" when handling ukey %s",
 								                 ukey->stats.n_packets, stats->n_packets, ds_cstr(&ds));
 								    ds_destroy(&ds);
 								}
-												revalidator: Refactor revalidation early exit.

Shift the early-exit conditions for revalidation into a separate
function.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 13:13:04 -07:00
+								/* Verifies that the datapath actions of 'ukey' are still correct, and pushes
 								 * 'stats' for it.
 								 *
 								 * Returns a recommended action for 'ukey', options include:
 								 *      UKEY_DELETE The ukey should be deleted.
 								 *      UKEY_KEEP   The ukey is fine as is.
 								 *      UKEY_MODIFY The ukey's actions should be changed but is otherwise
 								 *                  fine.  Callers should change the actions to those found
 								 *                  in the caller supplied 'odp_actions' buffer.  The
 								 *                  recirculation references can be found in 'recircs' and
 								 *                  must be handled by the caller.
 								 *
 								 * If the result is UKEY_MODIFY, then references to all recirc_ids used by the
 								 * new flow will be held within 'recircs' (which may be none).
 								 *
 								 * The caller is responsible for both initializing 'recircs' prior this call,
 								 * and ensuring any references are eventually freed.
 								 */
 								static enum reval_result
 								revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey,
 								                const struct dpif_flow_stats *stats,
 								                struct ofpbuf *odp_actions, uint64_t reval_seq,
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								                struct recirc_refs *recircs, enum flow_del_reason *del_reason)
-												revalidator: Refactor revalidation early exit.

Shift the early-exit conditions for revalidation into a separate
function.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 13:13:04 -07:00
+								    OVS_REQUIRES(ukey->mutex)
 								{
 								    bool need_revalidate = ukey->reval_seq != reval_seq;
 								    enum reval_result result = UKEY_DELETE;
 								    struct dpif_flow_stats push;
 								    ofpbuf_clear(odp_actions);
 								    push.used = stats->used;
 								    push.tcp_flags = stats->tcp_flags;
-												ofproto-dpif-upcall: Don't set statistics to 0 when they jump back.

The only way that stats->{n_packets,n_bytes} would decrease is due to an
overflow, or if there are bugs in how statistics are handled. In the
past, there were multiple issues that caused a jump backward. A
workaround was in place to set the statistics to 0 in that case. When
this happened while the revalidator was under heavy load, the workaround
had an unintended side effect where should_revalidate returned false
causing the flow to be removed because the metric it calculated was
based on a bogus value. Since many of those bugs have now been
identified and resolved, there is no need to set the statistics to 0. In
addition, the (unlikely) overflow still needs to be handled
appropriately. If an unexpected jump does happen, just log it as a
warning.

Signed-off-by: Balazs Nemeth <bnemeth@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-26 14:03:38 +02:00
+								    push.n_packets = stats->n_packets - ukey->stats.n_packets;
 								    push.n_bytes = stats->n_bytes - ukey->stats.n_bytes;
-												revalidator: Refactor revalidation early exit.

Shift the early-exit conditions for revalidation into a separate
function.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 13:13:04 -07:00
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								    if (stats->n_packets < ukey->stats.n_packets &&
 								        ukey->stats.n_packets < UINT64_THREE_QUARTERS) {
 								        /* Report cases where the packet counter is lower than the previous
 								         * instance, but exclude the potential wrapping of an uint64_t. */
 								        COVERAGE_INC(ukey_invalid_stat_reset);
-												ofproto-dpif-upcall: Don't set statistics to 0 when they jump back.

The only way that stats->{n_packets,n_bytes} would decrease is due to an
overflow, or if there are bugs in how statistics are handled. In the
past, there were multiple issues that caused a jump backward. A
workaround was in place to set the statistics to 0 in that case. When
this happened while the revalidator was under heavy load, the workaround
had an unintended side effect where should_revalidate returned false
causing the flow to be removed because the metric it calculated was
based on a bogus value. Since many of those bugs have now been
identified and resolved, there is no need to set the statistics to 0. In
addition, the (unlikely) overflow still needs to be handled
appropriately. If an unexpected jump does happen, just log it as a
warning.

Signed-off-by: Balazs Nemeth <bnemeth@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-26 14:03:38 +02:00
+								        log_unexpected_stats_jump(ukey, stats);
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								    }
-												revalidator: Simplify full-revalidation code.

Simplify the remaining bits of the original revalidation codepath to
only handle the "full-revalidation" case. Make the 'ukey' parameter
purely const by pushing the only piece that gets changed into a separate
argument.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:58:00 -07:00
+								    if (need_revalidate) {
-												ofproto-dpif-upcall: Wait for valid hw flow stats before applying min-revalidate-pps.

Depending on the driver implementation, it can take from 0.2 seconds
up to 2 seconds before offloaded flow statistics are updated. This is
true for both TC and rte_flow-based offloading. This is causing a
problem with min-revalidate-pps, as old statistic values are used
during this period.

This fix will wait for at least 2 seconds, by default, before assuming no
packets where received during this period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-08 13:55:44 +01:00
+								        if (should_revalidate(udpif, ukey, push.n_packets)) {
-												revalidator: Simplify full-revalidation code.

Simplify the remaining bits of the original revalidation codepath to
only handle the "full-revalidation" case. Make the 'ukey' parameter
purely const by pushing the only piece that gets changed into a separate
argument.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:58:00 -07:00
+								            if (!ukey->xcache) {
 								                ukey->xcache = xlate_cache_new();
 								            } else {
 								                xlate_cache_clear(ukey->xcache);
 								            }
 								            result = revalidate_ukey__(udpif, ukey, push.tcp_flags,
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								                                       odp_actions, recircs, ukey->xcache,
 								                                       del_reason);
 								        } else {
 								            /* Delete, since it is too expensive to revalidate. */
 								            *del_reason = FDR_TOO_EXPENSIVE;
 								        }
-												revalidator: Simplify full-revalidation code.

Simplify the remaining bits of the original revalidation codepath to
only handle the "full-revalidation" case. Make the 'ukey' parameter
purely const by pushing the only piece that gets changed into a separate
argument.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:58:00 -07:00
+								    } else if (!push.n_packets || ukey->xcache
 								               || !populate_xcache(udpif, ukey, push.tcp_flags)) {
 								        result = UKEY_KEEP;
-												revalidator: Refactor revalidation early exit.

Shift the early-exit conditions for revalidation into a separate
function.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 13:13:04 -07:00
+								    }
-												revalidator: Defer stats push to end of validation.

To make more of the core revalidate() functions do just one thing and
not modify state on the way, refactor them to prepare the xcache then
defer the ukey modification and stats/side effects execution to the end
of successful revalidation.

If revalidation causes deletion, then the xcache will be prepared and
attached to the ukey, but the actual execution will be skipped since it
will be executed on flow_delete very soon anyway with final stats.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:08:21 -07:00
+								    /* Stats for deleted flows will be attributed upon flow deletion. Skip. */
-												revalidator: Refactor revalidation early exit.

Shift the early-exit conditions for revalidation into a separate
function.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 13:13:04 -07:00
+								    if (result != UKEY_DELETE) {
-												ofproto-dpif-upcall: Wait for valid hw flow stats before applying min-revalidate-pps.

Depending on the driver implementation, it can take from 0.2 seconds
up to 2 seconds before offloaded flow statistics are updated. This is
true for both TC and rte_flow-based offloading. This is causing a
problem with min-revalidate-pps, as old statistic values are used
during this period.

This fix will wait for at least 2 seconds, by default, before assuming no
packets where received during this period.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-08 13:55:44 +01:00
+								        xlate_push_stats(ukey->xcache, &push, ukey->offloaded);
-												revalidator: Defer stats push to end of validation.

To make more of the core revalidate() functions do just one thing and
not modify state on the way, refactor them to prepare the xcache then
defer the ukey modification and stats/side effects execution to the end
of successful revalidation.

If revalidation causes deletion, then the xcache will be prepared and
attached to the ukey, but the actual execution will be skipped since it
will be executed on flow_delete very soon anyway with final stats.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:08:21 -07:00
+								        ukey->stats = *stats;
-												revalidator: Refactor revalidation early exit.

Shift the early-exit conditions for revalidation into a separate
function.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 13:13:04 -07:00
+								        ukey->reval_seq = reval_seq;
 								    }
-												revalidator: Simplify full-revalidation code.

Simplify the remaining bits of the original revalidation codepath to
only handle the "full-revalidation" case. Make the 'ukey' parameter
purely const by pushing the only piece that gets changed into a separate
argument.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 14:58:00 -07:00
-												revalidator: Refactor revalidation early exit.

Shift the early-exit conditions for revalidation into a separate
function.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 13:13:04 -07:00
+								    return result;
 								}
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								static void
-												dpif: Shift ufid support checking up to dpif_backer.

Previously, the dpif layer was responsible for determining datapath
support for UFIDs, which resulted in all ovs-dpctl utilities
inserting/deleting flows from the datapath each time they are run.
Shift this responsibility up to the dpif_backer.

There are two users of this functionality: Revalidators check for UFID
support to request a terser dump using UFIDs, and dpif-netlink uses this
to request flow_del operations to only return the UFID/stats. The latter
case was previously hidden from revalidators, but this change makes them
aware of it, and reuses the same "udpif->enable_ufid" flag for reducing
overhead of both flow dump and flow delete.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-12-16 17:44:40 -08:00
+								delete_op_init__(struct udpif *udpif, struct ukey_op *op,
 								                 const struct dpif_flow *flow)
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								{
-												revalidator: Fix access of uninitialized memory.

Commit 64bb477 "dpif: Minimize memory copy for revalidation." introduced
a bug where the corner case of ukey creation (in revalidator threads)
could result in access to uninitialized memory when deleting flows from
the datapath. This could result in OVS aborting or deadlock. Fix it.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-08 17:14:39 -08:00
+								    op->ukey = NULL;
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    op->dop.type = DPIF_OP_FLOW_DEL;
-												Embrace anonymous unions.

Several OVS structs contain embedded named unions, like this:

struct {
    ...
    union {
        ...
    } u;
};

C11 standardized a feature that many compilers already implemented
anyway, where an embedded union may be unnamed, like this:

struct {
    ...
    union {
        ...
    };
};

This is more convenient because it allows the programmer to omit "u."
in many places.  OVS already used this feature in several places.  This
commit embraces it in several others.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: Alin Gabriel Serdean <aserdean@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@ovn.org>

											
										
										
											2018-05-24 10:32:59 -07:00
+								    op->dop.flow_del.key = flow->key;
 								    op->dop.flow_del.key_len = flow->key_len;
 								    op->dop.flow_del.ufid = flow->ufid_present ? &flow->ufid : NULL;
 								    op->dop.flow_del.pmd_id = flow->pmd_id;
 								    op->dop.flow_del.stats = &op->stats;
 								    op->dop.flow_del.terse = udpif_use_ufid(udpif);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								static void
-												dpif: Shift ufid support checking up to dpif_backer.

Previously, the dpif layer was responsible for determining datapath
support for UFIDs, which resulted in all ovs-dpctl utilities
inserting/deleting flows from the datapath each time they are run.
Shift this responsibility up to the dpif_backer.

There are two users of this functionality: Revalidators check for UFID
support to request a terser dump using UFIDs, and dpif-netlink uses this
to request flow_del operations to only return the UFID/stats. The latter
case was previously hidden from revalidators, but this change makes them
aware of it, and reuses the same "udpif->enable_ufid" flag for reducing
overhead of both flow dump and flow delete.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-12-16 17:44:40 -08:00
+								delete_op_init(struct udpif *udpif, struct ukey_op *op, struct udpif_key *ukey)
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								{
 								    op->ukey = ukey;
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								    op->dop.type = DPIF_OP_FLOW_DEL;
-												Embrace anonymous unions.

Several OVS structs contain embedded named unions, like this:

struct {
    ...
    union {
        ...
    } u;
};

C11 standardized a feature that many compilers already implemented
anyway, where an embedded union may be unnamed, like this:

struct {
    ...
    union {
        ...
    };
};

This is more convenient because it allows the programmer to omit "u."
in many places.  OVS already used this feature in several places.  This
commit embraces it in several others.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: Alin Gabriel Serdean <aserdean@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@ovn.org>

											
										
										
											2018-05-24 10:32:59 -07:00
+								    op->dop.flow_del.key = ukey->key;
 								    op->dop.flow_del.key_len = ukey->key_len;
 								    op->dop.flow_del.ufid = ukey->ufid_present ? &ukey->ufid : NULL;
 								    op->dop.flow_del.pmd_id = ukey->pmd_id;
 								    op->dop.flow_del.stats = &op->stats;
 								    op->dop.flow_del.terse = udpif_use_ufid(udpif);
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								}
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								static void
-												upcall: Reuse flow_put initializer.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:02 -07:00
+								put_op_init(struct ukey_op *op, struct udpif_key *ukey,
 								            enum dpif_flow_put_flags flags)
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								{
 								    op->ukey = ukey;
 								    op->dop.type = DPIF_OP_FLOW_PUT;
-												Embrace anonymous unions.

Several OVS structs contain embedded named unions, like this:

struct {
    ...
    union {
        ...
    } u;
};

C11 standardized a feature that many compilers already implemented
anyway, where an embedded union may be unnamed, like this:

struct {
    ...
    union {
        ...
    };
};

This is more convenient because it allows the programmer to omit "u."
in many places.  OVS already used this feature in several places.  This
commit embraces it in several others.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: Alin Gabriel Serdean <aserdean@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@ovn.org>

											
										
										
											2018-05-24 10:32:59 -07:00
+								    op->dop.flow_put.flags = flags;
 								    op->dop.flow_put.key = ukey->key;
 								    op->dop.flow_put.key_len = ukey->key_len;
 								    op->dop.flow_put.mask = ukey->mask;
 								    op->dop.flow_put.mask_len = ukey->mask_len;
 								    op->dop.flow_put.ufid = ukey->ufid_present ? &ukey->ufid : NULL;
 								    op->dop.flow_put.pmd_id = ukey->pmd_id;
 								    op->dop.flow_put.stats = NULL;
 								    ukey_get_actions(ukey, &op->dop.flow_put.actions,
 								                     &op->dop.flow_put.actions_len);
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								}
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								/* Executes datapath operations 'ops' and attributes stats retrieved from the
 								 * datapath as part of those operations. */
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								static void
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								push_dp_ops(struct udpif *udpif, struct ukey_op *ops, size_t n_ops)
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								{
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								    struct dpif_op *opsp[REVALIDATE_MAX_BATCH];
 								    size_t i;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								    ovs_assert(n_ops <= REVALIDATE_MAX_BATCH);
 								    for (i = 0; i < n_ops; i++) {
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								        opsp[i] = &ops[i].dop;
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								    }
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								    dpif_operate(udpif->dpif, opsp, n_ops, DPIF_OFFLOAD_AUTO);
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
 								    for (i = 0; i < n_ops; i++) {
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								        struct ukey_op *op = &ops[i];
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
-												ofproto-dpif-upcall: Do not attribute stats when flow_del returns error.

In the push_ukey_ops__(), when flow_del operation returns error, the 'struct
stats' passed to the operation function will be set to all zero.  And we
should not use it to calculate the delta (i.e. minus the zero stats by the
cached stats causes overflow).

Even though this should rarely happen, it is still good to make
push_ukey_ops__() just ignore the operation when it fails.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2015-08-28 05:05:07 +00:00
+								        if (op->dop.error) {
-												ofproto-dpif-upcall: Transition ukey on dp_ops error.

In most situations, we don't expect that a flow we've successfully
dumped, which we intend to delete, cannot be deleted. However, to make
this code more resilient to ensure that ukeys *will* transition in all
cases (including an error at this stage), grab the lock and transition
this ukey forward to the evicted state, effectively treating a failure
to delete as "this flow is already gone".

If we subsequently find out that it wasn't deleted, then that's ok - we
will re-dump, and validate at that stage, which should lead to creating
a new ukey or deleting the datapath flow when that happens.

Signed-off-by: Joe Stringer <joe@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Greg Rose <gvrose8192@gmail.com>

											
										
										
											2017-09-06 15:12:52 -07:00
+								            if (op->ukey) {
 								                ovs_mutex_lock(&op->ukey->mutex);
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								                if (op->dop.type == DPIF_OP_FLOW_DEL) {
 								                    transition_ukey(op->ukey, UKEY_EVICTED);
 								                } else {
 								                    /* Modification of the flow failed. */
 								                    transition_ukey(op->ukey, UKEY_INCONSISTENT);
 								                }
-												ofproto-dpif-upcall: Transition ukey on dp_ops error.

In most situations, we don't expect that a flow we've successfully
dumped, which we intend to delete, cannot be deleted. However, to make
this code more resilient to ensure that ukeys *will* transition in all
cases (including an error at this stage), grab the lock and transition
this ukey forward to the evicted state, effectively treating a failure
to delete as "this flow is already gone".

If we subsequently find out that it wasn't deleted, then that's ok - we
will re-dump, and validate at that stage, which should lead to creating
a new ukey or deleting the datapath flow when that happens.

Signed-off-by: Joe Stringer <joe@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Greg Rose <gvrose8192@gmail.com>

											
										
										
											2017-09-06 15:12:52 -07:00
+								                ovs_mutex_unlock(&op->ukey->mutex);
 								            }
-												ofproto-dpif-upcall: Do not attribute stats when flow_del returns error.

In the push_ukey_ops__(), when flow_del operation returns error, the 'struct
stats' passed to the operation function will be set to all zero.  And we
should not use it to calculate the delta (i.e. minus the zero stats by the
cached stats causes overflow).

Even though this should rarely happen, it is still good to make
push_ukey_ops__() just ignore the operation when it fails.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2015-08-28 05:05:07 +00:00
+								            continue;
 								        }
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								        if (op->dop.type != DPIF_OP_FLOW_DEL) {
 								            /* Only deleted flows need their stats pushed. */
 								            continue;
 								        }
 								        struct dpif_flow_stats *push, *stats, push_buf;
 								        stats = op->dop.flow_del.stats;
 								        push = &push_buf;
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        if (op->ukey) {
 								            ovs_mutex_lock(&op->ukey->mutex);
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								            transition_ukey(op->ukey, UKEY_EVICTED);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								            push->used = MAX(stats->used, op->ukey->stats.used);
 								            push->tcp_flags = stats->tcp_flags | op->ukey->stats.tcp_flags;
 								            push->n_packets = stats->n_packets - op->ukey->stats.n_packets;
 								            push->n_bytes = stats->n_bytes - op->ukey->stats.n_bytes;
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
 								            if (stats->n_packets < op->ukey->stats.n_packets &&
 								                op->ukey->stats.n_packets < UINT64_THREE_QUARTERS) {
 								                /* Report cases where the packet counter is lower than the
 								                 * previous instance, but exclude the potential wrapping of an
 								                 * uint64_t. */
 								                COVERAGE_INC(ukey_invalid_stat_reset);
 								            }
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								            ovs_mutex_unlock(&op->ukey->mutex);
 								        } else {
 								            push = stats;
 								        }
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
 								        if (push->n_packets || netflow_exists()) {
-												Embrace anonymous unions.

Several OVS structs contain embedded named unions, like this:

struct {
    ...
    union {
        ...
    } u;
};

C11 standardized a feature that many compilers already implemented
anyway, where an embedded union may be unnamed, like this:

struct {
    ...
    union {
        ...
    };
};

This is more convenient because it allows the programmer to omit "u."
in many places.  OVS already used this feature in several places.  This
commit embraces it in several others.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: Alin Gabriel Serdean <aserdean@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@ovn.org>

											
										
										
											2018-05-24 10:32:59 -07:00
+								            const struct nlattr *key = op->dop.flow_del.key;
 								            size_t key_len = op->dop.flow_del.key_len;
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								            struct netflow *netflow;
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								            struct reval_context ctx = {
 								                .netflow = &netflow,
 								            };
-												revalidator: Simplify push_dump_ops__().

Commit acaa8dac49 (revalidator: Eliminate duplicate flow handling.)
ensured that a ukey will always exist for a given flow, even if it is
about to be deleted. This means that push_dump_ops__() no longer needs
to handle the case where there is no ukey. This commit removes the
redundant code.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-01 09:54:18 +00:00
+								            int error;
-												ofproto-dpif-xlate: Cache xlate_actions() effects.

This patch adds a new object called 'struct xlate_cache' which can be
set in 'struct xlate_in', and passed to xlate_actions() to cache the
modules affected by this flow translation. Subsequently, the caller can
pass the xcache to xlate_push_stats() to credit stats and perform side
effects for a lower cost than full flow translation.

These changes are aimed currently at long-lived flows, decreasing the
average dump duration for such flows by 50-80%. This allows more flows
to be supported in the datapath at a given time. Applying these changes
to short-lived flows is left for a later commit.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Add caching for fin_timeout action.
    Expire netflows on xlate_cache_clear().
    Account to bonds using a copy of 'flow' rather than hash.
    Always build XC_NORMAL entry (previously only if may_learn is true)
    Rename xlate_from_cache()->xlate_push_stats()
    Add may_learn parameter to xlate_push_stats()
    Tidy up xlate_actions__() mirror/netflow code.
    Fold in style fixups.
RFC: First post.

											
										
										
											2014-04-10 16:00:28 +12:00
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								            if (op->ukey) {
 								                ovs_mutex_lock(&op->ukey->mutex);
 								                if (op->ukey->xcache) {
-												Add offload packets statistics

Add argument '--offload-stats' for command ovs-appctl bridge/dump-flows
to display the offloaded packets statistics.

The commands display as below:

orignal command:

ovs-appctl bridge/dump-flows br0

duration=574s, n_packets=1152, n_bytes=110768, priority=0,actions=NORMAL
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=2,recirc_id=0,actions=drop
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x1,actions=controller(reason=)
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x2,actions=drop
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x3,actions=drop

new command with argument '--offload-stats'

Notice: 'n_offload_packets' are a subset of n_packets and 'n_offload_bytes' are
a subset of n_bytes.

ovs-appctl bridge/dump-flows --offload-stats br0

duration=582s, n_packets=1152, n_bytes=110768, n_offload_packets=1107, n_offload_bytes=107992, priority=0,actions=NORMAL
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=2,recirc_id=0,actions=drop
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x1,actions=controller(reason=)
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x2,actions=drop
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x3,actions=drop

Signed-off-by: zhaozhanxu <zhaozhanxu@163.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2019-12-05 14:26:25 +08:00
+								                    xlate_push_stats(op->ukey->xcache, push, false);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								                    ovs_mutex_unlock(&op->ukey->mutex);
 								                    continue;
 								                }
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								                ovs_mutex_unlock(&op->ukey->mutex);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								                key = op->ukey->key;
 								                key_len = op->ukey->key_len;
-												ofproto-dpif-xlate: Cache xlate_actions() effects.

This patch adds a new object called 'struct xlate_cache' which can be
set in 'struct xlate_in', and passed to xlate_actions() to cache the
modules affected by this flow translation. Subsequently, the caller can
pass the xcache to xlate_push_stats() to credit stats and perform side
effects for a lower cost than full flow translation.

These changes are aimed currently at long-lived flows, decreasing the
average dump duration for such flows by 50-80%. This allows more flows
to be supported in the datapath at a given time. Applying these changes
to short-lived flows is left for a later commit.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
---
v2: Acked.
v1: Add caching for fin_timeout action.
    Expire netflows on xlate_cache_clear().
    Account to bonds using a copy of 'flow' rather than hash.
    Always build XC_NORMAL entry (previously only if may_learn is true)
    Rename xlate_from_cache()->xlate_push_stats()
    Add may_learn parameter to xlate_push_stats()
    Tidy up xlate_actions__() mirror/netflow code.
    Fold in style fixups.
RFC: First post.

											
										
										
											2014-04-10 16:00:28 +12:00
+								            }
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								            error = xlate_key(udpif, key, key_len, push, &ctx);
 								            if (error) {
-												Don't shadow global VLOG "rl" definition.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 10:44:13 -08:00
+								                static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5);
 								                VLOG_WARN_RL(&rll, "xlate_key failed (%s)!",
-												revalidator: Fix logging of xlate_key() failure.

This was being logged using xlate_strerror(), but the return code is
actually an errno code. Use ovs_strerror() instead.

Fixes: dd0dc9eda0e0 ("revalidator: Reuse xlate_ukey from deletion.")
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2017-05-01 12:58:07 -07:00
+								                             ovs_strerror(error));
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								            } else {
 								                xlate_out_uninit(&ctx.xout);
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								                if (netflow) {
-												revalidator: Reuse xlate_ukey from deletion.

This code was already very similar to the actual revalidation code, but
previously it wasn't structured quite closely enough to share it. Do so.

xlate_actions_for_side_effects() is now unused, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-09-20 16:41:31 -07:00
+								                    netflow_flow_clear(netflow, &ctx.flow);
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								                }
 								            }
 								        }
 								    }
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								}
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								/* Executes datapath operations 'ops', attributes stats retrieved from the
 								 * datapath, and deletes ukeys corresponding to deleted flows. */
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								static void
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								push_ukey_ops(struct udpif *udpif, struct umap *umap,
 								              struct ukey_op *ops, size_t n_ops)
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								{
 								    int i;
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								    push_dp_ops(udpif, ops, n_ops);
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    ovs_mutex_lock(&umap->mutex);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								    for (i = 0; i < n_ops; i++) {
-												ofproto-dpif-upcall: Don't delete modified ukeys.

If revalidation returns the result UKEY_DELETE, then both the ukey and
its corresponding flow should be deleted. However, if revalidation
returns UKEY_MODIFY, the ukey itself should be modified in-place and
should not be deleted.

Fix this by only applying the ukey deletion to ukeys whose datapath
operations delete a flow.

This may fix statistics accounting issues in rare cases involving
OpenFlow rule modification where actions are updated but flows remain
the same.

Found by inspection.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:46 -08:00
+								        if (ops[i].dop.type == DPIF_OP_FLOW_DEL) {
 								            ukey_delete(umap, ops[i].ukey);
 								        }
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								    }
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    ovs_mutex_unlock(&umap->mutex);
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								}
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								static void
 								log_unexpected_flow(const struct dpif_flow *flow, int error)
 								{
 								    struct ds ds = DS_EMPTY_INITIALIZER;
 								    ds_put_format(&ds, "Failed to acquire udpif_key corresponding to "
 								                  "unexpected flow (%s): ", ovs_strerror(error));
 								    odp_format_ufid(&flow->ufid, &ds);
-												Don't shadow global VLOG "rl" definition.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-02-27 10:44:13 -08:00
 								    static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(10, 60);
 								    VLOG_WARN_RL(&rll, "%s", ds_cstr(&ds));
-												ofproto-dpif-upcall: Prevent memory leak on log message.

When DPIF does not support UFID (like old kernels), it may print this
message quite frequently, if using an OVS version that does not include
the upstream fix af50de800ecb ("ofproto-dpif-upcall: Pass key to
dpif_flow_get().").

Fixes: 64bb477f0568 ("dpif: Minimize memory copy for revalidation.")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-06-08 13:04:11 -03:00
+								    ds_destroy(&ds);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								}
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								static void
 								reval_op_init(struct ukey_op *op, enum reval_result result,
 								              struct udpif *udpif, struct udpif_key *ukey,
 								              struct recirc_refs *recircs, struct ofpbuf *odp_actions)
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    OVS_REQUIRES(ukey->mutex)
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								{
 								    if (result == UKEY_DELETE) {
 								        delete_op_init(udpif, op, ukey);
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								        transition_ukey(ukey, UKEY_EVICTING);
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								    } else if (result == UKEY_MODIFY) {
 								        /* Store the new recircs. */
 								        recirc_refs_swap(&ukey->recircs, recircs);
 								        /* Release old recircs. */
 								        recirc_refs_unref(recircs);
 								        /* ukey->key_recirc_id remains, as the key is the same as before. */
 								        ukey_set_actions(ukey, odp_actions);
-												upcall: Reuse flow_put initializer.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:02 -07:00
+								        put_op_init(op, ukey, DPIF_FP_MODIFY);
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								    }
 								}
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								static void
 								ukey_netdev_unref(struct udpif_key *ukey)
 								{
 								    if (!ukey->in_netdev) {
 								        return;
 								    }
 								    netdev_close(ukey->in_netdev);
 								    ukey->in_netdev = NULL;
 								}
 								/*
 								 * Given a udpif_key, get its input port (netdev) by parsing the flow keys
 								 * and actions. The flow may not contain flow attributes if it is a terse
 								 * dump; read its attributes from the ukey and then parse the flow to get
 								 * the port info. Save them in udpif_key.
 								 */
 								static void
 								ukey_to_flow_netdev(struct udpif *udpif, struct udpif_key *ukey)
 								{
-												netdev-offload: Use dpif type instead of class.

There is no real difference between the 'class' and 'type' in the
context of common lookup operations inside netdev-offload module
because it only checks the value of pointers without using the
value itself.  However, 'type' has some meaning and can be used by
offload provides on the initialization phase to check if this type
of Flow API in pair with the netdev type could be used in particular
datapath type.  For example, this is needed to check if Linux flow
API could be used for current tunneling vport because it could be
used only if tunneling vport belongs to system datapath, i.e. has
backing linux interface.

This is needed to unblock tunneling offloads in userspace datapath
with DPDK flow API.

Acked-by: Eli Britstein <elibr@mellanox.com>
Acked-by: Roni Bar Yanai <roniba@mellanox.com>
Acked-by: Ophir Munk <ophirmu@mellanox.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-07-08 06:38:29 +00:00
+								    const char *dpif_type_str = dpif_normalize_type(dpif_type(udpif->dpif));
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								    const struct nlattr *k;
 								    unsigned int left;
 								    /* Remove existing references to netdev */
 								    ukey_netdev_unref(ukey);
 								    /* Find the input port and get a reference to its netdev */
 								    NL_ATTR_FOR_EACH (k, left, ukey->key, ukey->key_len) {
 								        enum ovs_key_attr type = nl_attr_type(k);
 								        if (type == OVS_KEY_ATTR_IN_PORT) {
 								            ukey->in_netdev = netdev_ports_get(nl_attr_get_odp_port(k),
-												netdev-offload: Use dpif type instead of class.

There is no real difference between the 'class' and 'type' in the
context of common lookup operations inside netdev-offload module
because it only checks the value of pointers without using the
value itself.  However, 'type' has some meaning and can be used by
offload provides on the initialization phase to check if this type
of Flow API in pair with the netdev type could be used in particular
datapath type.  For example, this is needed to check if Linux flow
API could be used for current tunneling vport because it could be
used only if tunneling vport belongs to system datapath, i.e. has
backing linux interface.

This is needed to unblock tunneling offloads in userspace datapath
with DPDK flow API.

Acked-by: Eli Britstein <elibr@mellanox.com>
Acked-by: Roni Bar Yanai <roniba@mellanox.com>
Acked-by: Ophir Munk <ophirmu@mellanox.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-07-08 06:38:29 +00:00
+								                                               dpif_type_str);
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								        } else if (type == OVS_KEY_ATTR_TUNNEL) {
 								            struct flow_tnl tnl;
 								            enum odp_key_fitness res;
 								            if (ukey->in_netdev) {
 								                netdev_close(ukey->in_netdev);
 								                ukey->in_netdev = NULL;
 								            }
-												odp-util: Improve log messages and error reporting for Netlink parsing.

As a side effect, this also reduces a lot of log messages' severities from
ERR to WARN.  They just didn't seem like messages that in general reported
anything that would prevent functioning.

Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-12-14 18:16:55 -08:00
+								            res = odp_tun_key_from_attr(k, &tnl, NULL);
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								            if (res != ODP_FIT_ERROR) {
 								                ukey->in_netdev = flow_get_tunnel_netdev(&tnl);
 								                break;
 								            }
 								        }
 								    }
 								}
-												revalidator: Gather packets-per-second rate of flows

This is the second patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The packets-per-second (pps) rate for each flow is computed in the context
of revalidator threads when the flow stats are retrieved. The pps-rate is
computed only after a flow is revalidated and is not scheduled for
deletion. The parameters used to compute pps and the pps itself are saved
in udpif_key since they need to be persisted across iterations of
rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:13 +05:30
+								static uint64_t
 								udpif_flow_packet_delta(struct udpif_key *ukey, const struct dpif_flow *f)
 								{
 								    return f->stats.n_packets + ukey->flow_backlog_packets -
 								                ukey->flow_packets;
 								}
 								static long long int
 								udpif_flow_time_delta(struct udpif *udpif, struct udpif_key *ukey)
 								{
 								    return (udpif->dpif->current_ms - ukey->flow_time) / 1000;
 								}
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								/*
 								 * Save backlog packet count while switching modes
 								 * between offloaded and kernel datapaths.
 								 */
 								static void
 								udpif_set_ukey_backlog_packets(struct udpif_key *ukey)
 								{
 								    ukey->flow_backlog_packets = ukey->flow_packets;
 								}
-												revalidator: Gather packets-per-second rate of flows

This is the second patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The packets-per-second (pps) rate for each flow is computed in the context
of revalidator threads when the flow stats are retrieved. The pps-rate is
computed only after a flow is revalidated and is not scheduled for
deletion. The parameters used to compute pps and the pps itself are saved
in udpif_key since they need to be persisted across iterations of
rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:13 +05:30
+								/* Gather pps-rate for the given dpif_flow and save it in its ukey */
 								static void
 								udpif_update_flow_pps(struct udpif *udpif, struct udpif_key *ukey,
 								                      const struct dpif_flow *f)
 								{
 								    uint64_t pps;
 								    /* Update pps-rate only when we are close to rebalance interval */
 								    if (udpif->dpif->current_ms - ukey->flow_time < OFFL_REBAL_INTVL_MSEC) {
 								        return;
 								    }
 								    ukey->offloaded = f->attrs.offloaded;
 								    pps = udpif_flow_packet_delta(ukey, f) /
 								                    udpif_flow_time_delta(udpif, ukey);
 								    ukey->flow_pps_rate = pps;
 								    ukey->flow_packets = ukey->flow_backlog_packets + f->stats.n_packets;
 								    ukey->flow_time = udpif->dpif->current_ms;
 								}
-												netdev-offload: Implement terse dump support

In order to improve revalidator performance by minimizing unnecessary
copying of data, extend netdev-offloads to support terse dump mode. Extend
netdev_flow_api->flow_dump_create() with 'terse' bool argument. Implement
support for terse dump in functions that convert netlink to flower and
flower to match. Set flow stats "used" value based on difference in number
of flow packets because lastuse timestamp is not included in TC terse dump.

Kernel API support is implemented in following patch.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2020-06-04 13:47:00 +03:00
+								static long long int
 								udpif_update_used(struct udpif *udpif, struct udpif_key *ukey,
 								                  struct dpif_flow_stats *stats)
 								    OVS_REQUIRES(ukey->mutex)
 								{
 								    if (!udpif->dump->terse) {
 								        return ukey->created;
 								    }
 								    if (stats->n_packets > ukey->stats.n_packets) {
 								        stats->used = udpif->dpif->current_ms;
 								    } else if (ukey->stats.used) {
 								        stats->used = ukey->stats.used;
 								    } else {
 								        stats->used = ukey->created;
 								    }
 								    return stats->used;
 								}
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								static void
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								revalidate(struct revalidator *revalidator)
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								{
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								    uint64_t odp_actions_stub[1024 / 8];
 								    struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
-												upcall: Refactor ukey creation and dump handling

This splits out functions for re-use by later patches, and compacts the
udump revalidation code.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:34 -08:00
+								    struct udpif *udpif = revalidator->udpif;
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    struct dpif_flow_dump_thread *dump_thread;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    uint64_t dump_seq, reval_seq;
-												ofproto-dpif-upcall: Log the emergency flow flush.

When the number of flows in the datapath reaches twice the
maximum, revalidators will delete all flows as an emergency
action to recover. In that case, log a message with values
and increase a coverage counter.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-09-30 16:23:59 -03:00
+								    bool kill_warn_print = true;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    unsigned int flow_limit;
-												revalidator: Replace ukey->mark with dump_seq.

Rather than setting and resetting the 'mark' field in the ukey, this
patch introduces a seq to track whether a flow has been seen during the
most recent dump. This tidies the code and simplifies the logic for
detecting when flows are duplicated from the datapath.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-14 16:17:25 +12:00
+								    dump_seq = seq_read(udpif->dump_seq);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    reval_seq = seq_read(udpif->reval_seq);
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								    atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    dump_thread = dpif_flow_dump_thread_create(udpif->dump);
 								    for (;;) {
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								        struct ukey_op ops[REVALIDATE_MAX_BATCH];
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        int n_ops = 0;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        struct dpif_flow flows[REVALIDATE_MAX_BATCH];
 								        const struct dpif_flow *f;
 								        int n_dumped;
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        long long int max_idle;
 								        long long int now;
-												ofproto-dpif-upcall: Log the emergency flow flush.

When the number of flows in the datapath reaches twice the
maximum, revalidators will delete all flows as an emergency
action to recover. In that case, log a message with values
and increase a coverage counter.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-09-30 16:23:59 -03:00
+								        size_t kill_all_limit;
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        size_t n_dp_flows;
 								        bool kill_them_all;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        n_dumped = dpif_flow_dump_next(dump_thread, flows, ARRAY_SIZE(flows));
 								        if (!n_dumped) {
 								            break;
-												revalidator: Prevent handling the same flow twice.

When the datapath flow table is modified while a flow dump operation is
in progress, it is possible for the same flow to be dumped twice. In
such cases, revalidators may perform redundant work, or attempt to
delete the same flow twice.

This was causing intermittent testsuite failures for test #670 -
"ofproto-dpif, active-backup bonding" where a flow (that had not
previously been dumped) was dumped, revalidated and deleted twice.

The logs show errors such as:
"failed to flow_get (No such file or directory) skb_priority(0),..."
"failed to flow_del (No such file or directory) skb_priority(0),..."

This patch adds a 'flow_exists' field to 'struct udpif_key' to track
whether the flow is (in progress) to be deleted. After doing a ukey
lookup, we check whether ukey->mark or ukey->flow indicates that the
flow has already been handled. If it has already been handled, we skip
handling the flow again.

We also defer ukey cleanup for flows that fail revalidation, so that the
ukey will still exist if the same flow is dumped twice. This allows the
above logic to work in this case.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Alex Wang <alexw@nicira.com>

											
										
										
											2014-04-23 15:31:17 +12:00
+								        }
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        /* In normal operation we want to keep flows around until they have
 								         * been idle for 'ofproto_max_idle' milliseconds.  However:
 								         *
 								         *     - If the number of datapath flows climbs above 'flow_limit',
 								         *       drop that down to 100 ms to try to bring the flows down to
 								         *       the limit.
 								         *
 								         *     - If the number of datapath flows climbs above twice
 								         *       'flow_limit', delete all the datapath flows as an emergency
 								         *       measure.  (We reassess this condition for the next batch of
 								         *       datapath flows, so we will recover before all the flows are
 								         *       gone.) */
 								        n_dp_flows = udpif_get_n_flows(udpif);
-												ofproto: report coverage on hitting datapath flow limit

Whenever the number of flows in the datapath crosses above
the flow limit set/autoconfigured, it is helpful to report
this event through coverage counter for an operator/devops
engineer to know and take proactive corrections in the
switch configuration.

Today, these events are reported in ovs vswitch log when
a new flow can not be inserted in upcall processing in which
case ovs writes a warning, otherwise an auto correction
made by ovs to flush old flows without any intimation at all.

Signed-off-by: Gowrishankar Muthukrishnan <gmuthukr@redhat.com>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-04-20 19:13:42 +05:30
+								        if (n_dp_flows >= flow_limit) {
 								            COVERAGE_INC(upcall_flow_limit_hit);
 								        }
-												ofproto-dpif-upcall: Log the emergency flow flush.

When the number of flows in the datapath reaches twice the
maximum, revalidators will delete all flows as an emergency
action to recover. In that case, log a message with values
and increase a coverage counter.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-09-30 16:23:59 -03:00
+								        kill_them_all = false;
 								        kill_all_limit = flow_limit * 2;
 								        if (OVS_UNLIKELY(n_dp_flows > kill_all_limit)) {
 								            static struct vlog_rate_limit rlem = VLOG_RATE_LIMIT_INIT(1, 1);
 								            kill_them_all = true;
 								            COVERAGE_INC(upcall_flow_limit_kill);
 								            if (kill_warn_print) {
 								                kill_warn_print = false;
 								                VLOG_WARN_RL(&rlem,
 								                    "Number of datapath flows (%"PRIuSIZE") twice as high as "
 								                    "current dynamic flow limit (%"PRIuSIZE").  "
 								                    "Starting to delete flows unconditionally "
 								                    "as an emergency measure.", n_dp_flows, kill_all_limit);
 								            }
 								        }
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;
-												ofproto-dpif-upcall: Remove redundant time_msec() in revalidate().

Remove one of two consecutive time_msec() calls in the revalidate() function.

We take the time stamp after udpif_get_n_flows(), to avoid any potential
delays in getting the number of offloaded flows.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-09 13:30:16 +01:00
+								        udpif->dpif->current_ms = now = time_msec();
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        for (f = flows; f < &flows[n_dumped]; f++) {
 								            long long int used = f->stats.used;
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								            struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								            enum flow_del_reason del_reason = FDR_NONE;
-												netdev-offload: Implement terse dump support

In order to improve revalidator performance by minimizing unnecessary
copying of data, extend netdev-offloads to support terse dump mode. Extend
netdev_flow_api->flow_dump_create() with 'terse' bool argument. Implement
support for terse dump in functions that convert netlink to flower and
flower to match. Set flow stats "used" value based on difference in number
of flow packets because lastuse timestamp is not included in TC terse dump.

Kernel API support is implemented in following patch.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2020-06-04 13:47:00 +03:00
+								            struct dpif_flow_stats stats = f->stats;
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								            enum reval_result result;
-												revalidator: Refactor ukey creation/lookup.

This patch refactors the code around ukey creation and lookup to
simplify the code for callers. A new function ukey_acquire() combines
these functions and attempts to acquire a lock on the ukey. Failure to
acquire a lock on the ukey is usually a sign that another thread is
handling the same flow concurrently, which means the flow does not need
to be handled anyway.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-06-04 09:59:23 +00:00
+								            struct udpif_key *ukey;
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								            bool already_dumped;
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								            int error;
-												revalidator: Eliminate duplicate flow handling.

A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.

The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.

Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-28 15:23:42 +12:00
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								            if (ukey_acquire(udpif, f, &ukey, &error)) {
 								                if (error == EBUSY) {
 								                    /* Another thread is processing this flow, so don't bother
 								                     * processing it.*/
 								                    COVERAGE_INC(upcall_ukey_contention);
 								                } else {
 								                    log_unexpected_flow(f, error);
-												revalidator: Don't delete non-existent flow.

If ukey_acquire() returns ENOENT, then it is unable to locate the ukey
corresponding to the flow and the flow has disappeared since it was
dumped. Don't bother deleting the flow in this case, as it will fail.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Thomas Graf <tgraf@noironetworks.com>

											
										
										
											2014-12-10 11:20:10 -08:00
+								                    if (error != ENOENT) {
-												dpif: Shift ufid support checking up to dpif_backer.

Previously, the dpif layer was responsible for determining datapath
support for UFIDs, which resulted in all ovs-dpctl utilities
inserting/deleting flows from the datapath each time they are run.
Shift this responsibility up to the dpif_backer.

There are two users of this functionality: Revalidators check for UFID
support to request a terser dump using UFIDs, and dpif-netlink uses this
to request flow_del operations to only return the UFID/stats. The latter
case was previously hidden from revalidators, but this change makes them
aware of it, and reuses the same "udpif->enable_ufid" flag for reducing
overhead of both flow dump and flow delete.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-12-16 17:44:40 -08:00
+								                        delete_op_init__(udpif, &ops[n_ops++], f);
-												revalidator: Don't delete non-existent flow.

If ukey_acquire() returns ENOENT, then it is unable to locate the ukey
corresponding to the flow and the flow has disappeared since it was
dumped. Don't bother deleting the flow in this case, as it will fail.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Thomas Graf <tgraf@noironetworks.com>

											
										
										
											2014-12-10 11:20:10 -08:00
+								                    }
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								                }
-												revalidator: Eliminate duplicate flow handling.

A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.

The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.

Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-28 15:23:42 +12:00
+								                continue;
 								            }
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								            ukey->offloaded = f->attrs.offloaded;
 								            if (!ukey->dp_layer
 								                || (!dpif_synced_dp_layers(udpif->dpif)
 								                    && strcmp(ukey->dp_layer, f->attrs.dp_layer))) {
 								                if (ukey->dp_layer) {
 								                    /* The dp_layer has changed this is probably due to an
 								                     * earlier revalidate cycle moving it to/from hw offload.
 								                     * In this case we should reset the ukey stored statistics,
 								                     * as they are from the deleted DP flow. */
 								                    COVERAGE_INC(ukey_dp_change);
 								                    memset(&ukey->stats, 0, sizeof ukey->stats);
 								                }
 								                ukey->dp_layer = f->attrs.dp_layer;
 								            }
-												revalidator: Replace ukey->mark with dump_seq.

Rather than setting and resetting the 'mark' field in the ukey, this
patch introduces a seq to track whether a flow has been seen during the
most recent dump. This tidies the code and simplifies the logic for
detecting when flows are duplicated from the datapath.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-14 16:17:25 +12:00
+								            already_dumped = ukey->dump_seq == dump_seq;
-												revalidator: Eliminate duplicate flow handling.

A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.

The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.

Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-28 15:23:42 +12:00
+								            if (already_dumped) {
-												revalidator: Distinguish new and duplicate flows.

We previously counted flows that have been installed during the current
dump as duplicates, rather than recognising them as new flows. This
patch separates the counters out for these two cases.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-26 17:28:05 +00:00
+								                /* The flow has already been handled during this flow dump
 								                 * operation. Skip it. */
 								                if (ukey->xcache) {
 								                    COVERAGE_INC(dumped_duplicate_flow);
 								                } else {
 								                    COVERAGE_INC(dumped_new_flow);
 								                }
-												revalidator: Eliminate duplicate flow handling.

A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.

The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.

Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-28 15:23:42 +12:00
+								                ovs_mutex_unlock(&ukey->mutex);
 								                continue;
 								            }
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								            if (ukey->state == UKEY_INCONSISTENT) {
 								                ukey->dump_seq = dump_seq;
 								                reval_op_init(&ops[n_ops++], UKEY_DELETE, udpif, ukey,
 								                              &recircs, &odp_actions);
 								                ovs_mutex_unlock(&ukey->mutex);
 								                COVERAGE_INC(dumped_inconsistent_flow);
 								                continue;
 								            }
-												revalidator: Avoid assert in transition_ukey().

There is a case where a flow is dumped from the kernel after the ukey is
already transitioned into an EVICTING/EVICTED/DELETED state, and the
revalidator thread attempts to shift that into UKEY_OPERATIONAL because
it was able to dump the flow from the datapath. This resulted in
triggering the assert in transition_ukey(). Detect this condition and
skip handling the flow (as it's already on its way out).

Users report:
> Program terminated with signal SIGABRT, Aborted.
> raise () from /lib/x86_64-linux-gnu/libc.so.6
> raise () from /lib/x86_64-linux-gnu/libc.so.6
> abort () from /lib/x86_64-linux-gnu/libc.so.6
> ovs_abort_valist
> vlog_abort_valist
> vlog_abort
> ovs_assert_failure
> transition_ukey (ukey=<optimized out>, dst=<optimized out>)
>     at ofproto/ofproto-dpif-upcall.c:1674
> revalidate (revalidator=0x1cb36c8) at ofproto/ofproto-dpif-upcall.c:2324
> udpif_revalidator (arg=0x1cb36c8) at ofproto/ofproto-dpif-upcall.c:901
> ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:348
> start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
> clone () from /lib/x86_64-linux-gnu/libc.so.6

VMware-BZ: #1857694
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:11 -07:00
+								            if (ukey->state <= UKEY_OPERATIONAL) {
 								                /* The flow is now confirmed to be in the datapath. */
 								                transition_ukey(ukey, UKEY_OPERATIONAL);
 								            } else {
-												revalidator: Improve logging for transition_ukey().

There are a few cases where more introspection into ukey transitions
would be relevant for logging or assertion. Track the SOURCE_LOCATOR and
thread id when states are transitioned and use these for logging.

Suggested-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:12 -07:00
+								                VLOG_INFO("Unexpected ukey transition from state %d "
 								                          "(last transitioned from thread %u at %s)",
 								                          ukey->state, ukey->state_thread, ukey->state_where);
-												revalidator: Avoid assert in transition_ukey().

There is a case where a flow is dumped from the kernel after the ukey is
already transitioned into an EVICTING/EVICTED/DELETED state, and the
revalidator thread attempts to shift that into UKEY_OPERATIONAL because
it was able to dump the flow from the datapath. This resulted in
triggering the assert in transition_ukey(). Detect this condition and
skip handling the flow (as it's already on its way out).

Users report:
> Program terminated with signal SIGABRT, Aborted.
> raise () from /lib/x86_64-linux-gnu/libc.so.6
> raise () from /lib/x86_64-linux-gnu/libc.so.6
> abort () from /lib/x86_64-linux-gnu/libc.so.6
> ovs_abort_valist
> vlog_abort_valist
> vlog_abort
> ovs_assert_failure
> transition_ukey (ukey=<optimized out>, dst=<optimized out>)
>     at ofproto/ofproto-dpif-upcall.c:1674
> revalidate (revalidator=0x1cb36c8) at ofproto/ofproto-dpif-upcall.c:2324
> udpif_revalidator (arg=0x1cb36c8) at ofproto/ofproto-dpif-upcall.c:901
> ovsthread_wrapper (aux_=<optimized out>) at lib/ovs-thread.c:348
> start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
> clone () from /lib/x86_64-linux-gnu/libc.so.6

VMware-BZ: #1857694
Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-04-26 18:03:11 -07:00
+								                ovs_mutex_unlock(&ukey->mutex);
 								                continue;
 								            }
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
-												revalidator: Eliminate duplicate flow handling.

A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.

The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.

Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-28 15:23:42 +12:00
+								            if (!used) {
-												netdev-offload: Implement terse dump support

In order to improve revalidator performance by minimizing unnecessary
copying of data, extend netdev-offloads to support terse dump mode. Extend
netdev_flow_api->flow_dump_create() with 'terse' bool argument. Implement
support for terse dump in functions that convert netlink to flower and
flower to match. Set flow stats "used" value based on difference in number
of flow packets because lastuse timestamp is not included in TC terse dump.

Kernel API support is implemented in following patch.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2020-06-04 13:47:00 +03:00
+								                used = udpif_update_used(udpif, ukey, &stats);
-												revalidator: Eliminate duplicate flow handling.

A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.

The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.

Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-28 15:23:42 +12:00
+								            }
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								            if (kill_them_all || (used && used < now - max_idle)) {
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								                result = UKEY_DELETE;
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								                del_reason = (kill_them_all) ? FDR_FLOW_LIMIT : FDR_FLOW_IDLE;
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								            } else {
-												netdev-offload: Implement terse dump support

In order to improve revalidator performance by minimizing unnecessary
copying of data, extend netdev-offloads to support terse dump mode. Extend
netdev_flow_api->flow_dump_create() with 'terse' bool argument. Implement
support for terse dump in functions that convert netlink to flower and
flower to match. Set flow stats "used" value based on difference in number
of flow packets because lastuse timestamp is not included in TC terse dump.

Kernel API support is implemented in following patch.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2020-06-04 13:47:00 +03:00
+								                result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								                                         reval_seq, &recircs, &del_reason);
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								            }
-												revalidator: Replace ukey->mark with dump_seq.

Rather than setting and resetting the 'mark' field in the ukey, this
patch introduces a seq to track whether a flow has been seen during the
most recent dump. This tidies the code and simplifies the logic for
detecting when flows are duplicated from the datapath.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-14 16:17:25 +12:00
+								            ukey->dump_seq = dump_seq;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												revalidator: Gather packets-per-second rate of flows

This is the second patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The packets-per-second (pps) rate for each flow is computed in the context
of revalidator threads when the flow stats are retrieved. The pps-rate is
computed only after a flow is revalidated and is not scheduled for
deletion. The parameters used to compute pps and the pps itself are saved
in udpif_key since they need to be persisted across iterations of
rebalancing.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:13 +05:30
+								            if (netdev_is_offload_rebalance_policy_enabled() &&
 								                result != UKEY_DELETE) {
 								                udpif_update_flow_pps(udpif, ukey, f);
 								            }
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								            OVS_USDT_PROBE(revalidate, flow_result, udpif, ukey, result,
 								                           del_reason);
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								            if (result != UKEY_KEEP) {
 								                /* Takes ownership of 'recircs'. */
 								                reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
 								                              &odp_actions);
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								            }
-												revalidator: Eliminate duplicate flow handling.

A series of bugs have been identified recently that are caused by a
combination of the awkward flow dump API, possibility of duplicate flows
in a flow dump, and premature optimisation of the revalidator logic.
This patch attempts to simplify the revalidator logic by combining
multiple critical sections into one, which should make the state more
consistent.

The new flow of logic is:
+ Lookup the ukey.
+ If the ukey doesn't exist, create it.
+ Insert the ukey into the udpif. If we can't insert it, skip this flow.
+ Lock the ukey. If we can't lock it, skip it.
+ Determine if the ukey was already handled. If it has, skip it.
+ Revalidate.
+ Update ukey's fields (mark, flow_exists).
+ Unlock the ukey.

Previously, we would attempt process a flow without creating a ukey if
it hadn't been dumped before and it was due to be deleted. This patch
changes this to always create a ukey, allowing the ukey's
mutex to be used as the basis for preventing a flow from being handled
twice. This improves code correctness and readability.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-05-28 15:23:42 +12:00
+								            ovs_mutex_unlock(&ukey->mutex);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								        }
-												upcall: Defer ukey deletion until after pushing stats.

It is possible for a datapath to dump the same flow twice, for instance
if the flow is the last in a batch of flows to be dumped, then a new
flow is inserted into the same bucket before the flow dumper fetches
another batch.

In this case, datapath flow stats may be duplicated: The revalidator
records the stats from the first flow, using the ukey to get the stats
delta. The ukey is deleted, then the revalidator reads the second
(duplicate) flow and cannot lookup the ukey for the delta. As such, it
will push the stats as-is.

This patch reduces the likelihood of such stats duplications by
deferring ukey deletion until after stats are pushed for deleted flows.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:33 -08:00
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								        if (n_ops) {
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								            /* Push datapath ops but defer ukey deletion to 'sweep' phase. */
 								            push_dp_ops(udpif, ops, n_ops);
-												ofproto-dpif-upcall: Remove the flow_dumper thread.

Previously, we had a separate flow_dumper thread that fetched flows from
the datapath to distribute to revalidator threads. This patch takes the
logic for dumping and pushes it into the revalidator threads, resulting
in simpler code with similar performance to the current code.

One thread, the "leader", is responsible for beginning and ending each
flow dump, maintaining the flow_limit, and checking whether the
revalidator threads need to exit. All revalidator threads dump,
revalidate, delete datapath flows and garbage collect ukeys.

Co-authored-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-04-10 07:14:08 +00:00
+								        }
-												revalidator: Use 'cmap' for storing ukeys.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:40 +12:00
+								        ovsrcu_quiesce();
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    }
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    dpif_flow_dump_thread_destroy(dump_thread);
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								    ofpbuf_uninit(&odp_actions);
-												revalidator: Revalidate missed flows.

If the datapath doesn't dump a flow for some reason, and the current
dump is expected to revalidate all flows in the datapath, then perform
revalidation for those flows by fetching them during the sweep phase.
If revalidation is not required, then leave the flow in the datapath and
don't revalidate it.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-08 07:04:05 +00:00
+								}
-												ofproto-dpif-upcall: Allow main thread to pause all revalidators.

This commit adds logic using ovs barrier to allow main thread pause
all revalidators.  This new feature will be used in a later patch.

Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-29 06:09:45 +00:00
+								/* Pauses the 'revalidator', can only proceed after main thread
 								 * calls udpif_resume_revalidators(). */
 								static void
 								revalidator_pause(struct revalidator *revalidator)
 								{
 								    /* The first block is for sync'ing the pause with main thread. */
 								    ovs_barrier_block(&revalidator->udpif->pause_barrier);
 								    /* The second block is for pausing until main thread resumes. */
 								    ovs_barrier_block(&revalidator->udpif->pause_barrier);
 								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								static void
-												upcall: Remove datapath flows when setting n-threads.

Previously, we would delete all ukeys when changing the number of
threads, but leave all flows in the datapath. This would cause
double-counting of stats for any flows that remain in the datapath. This
patch fixes the issue by ensuring that all flows are deleted from the
datapath before changing the number of threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:36 -08:00
+								revalidator_sweep__(struct revalidator *revalidator, bool purge)
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								{
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    struct udpif *udpif;
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    uint64_t dump_seq, reval_seq;
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    int slice;
-												upcall: Delete flows that were not recently dumped.

Previously, we would clean up the ukeys whose flow was not seen in the
most recent dump, while leaving the flow in the datapath. In the
unlikely case that the datapath fails to dump a flow that still exists
in the datapath, this would cause double-counting of those flow stats.

This is currently very rare to see due to batching of datapath flow
deletion, but is more easily observable with upcoming patches which
modify the batch size based on dpif implementation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:35 -08:00
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    udpif = revalidator->udpif;
 								    dump_seq = seq_read(udpif->dump_seq);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								    reval_seq = seq_read(udpif->reval_seq);
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								    slice = revalidator - udpif->revalidators;
 								    ovs_assert(slice < udpif->n_revalidators);
 								    for (int i = slice; i < N_UMAPS; i += udpif->n_revalidators) {
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								        uint64_t odp_actions_stub[1024 / 8];
 								        struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								        struct ukey_op ops[REVALIDATE_MAX_BATCH];
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								        struct udpif_key *ukey;
 								        struct umap *umap = &udpif->ukeys[i];
 								        size_t n_ops = 0;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								        CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								            enum flow_del_reason del_reason = FDR_NONE;
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								            enum ukey_state ukey_state;
-												revalidator: Protect ukeys with a mutex.

Currently, udpif_keys are protected during revalidator_sweep__() as only
one thread accesses the ukey at a time. This is ensured using barriers:
all revalidators will be in the GC phase, so they will only access their
own ukey collection.

A future patch will change the access patterns to allow these ukey
collections to be read or modified while a revalidator is garbage
collecting it. To protect the ukeys, this patch adds locking on the ukey
collection.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-05 15:44:04 +12:00
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								            /* Handler threads could be holding a ukey lock while it installs a
 								             * new flow, so don't hang around waiting for access to it. */
 								            if (ovs_mutex_trylock(&ukey->mutex)) {
-												ofproto-dpif-upcall: Fix ukey installation failure logs and counters.

ukey_install() returns boolean signaling if the ukey was installed
or not.  Installation may fail for a few reasons:

 1. Conflicting ukey.
 2. Mutex contention while trying to replace existing ukey.
 3. The same ukey already exists and active.

Only the first case here signals an actual problem.  Third one is
a little odd for userspace datapath, but harmless.  Second is the
most common one that can easily happen during normal operation
since other threads like revalidators may be currently working on
this ukey preventing an immediate access.

Since only the first case is actually worth logging and it already
has its own log message, removing the 'upcall installation fails'
warning from the upcall_cb().  This should fix most of the random
failures of userspace system tests in CI.

While at it, also fixing coverage counters.  Mutex contention was
mistakenly counted as a duplicate upcall.  ukey contention for
revalidators was counted only in one of two places.

New counter added for the ukey contention on replace.  We should
not re-use existing upcall_ukey_contention counter for this, since
it may lead to double counting.

Fixes: 67f08985d769 ("upcall: Replace ukeys for deleted flows.")
Fixes: 9cec8274ed9a ("ofproto-dpif-upcall: Add VLOG_WARN_RL logs for upcall_cb() error.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-04-04 14:09:37 +02:00
+								                COVERAGE_INC(upcall_ukey_contention);
-												upcall: Create ukeys in handler threads.

Currently, when a revalidator thread first dumps a flow, it creates a
'udpif_key' object and caches a copy of a kernel flow key. This allows
us to perform lookups in the classifier to attribute stats and validate
the correctness of the datapath flow.

This patch sets up this cache from the handler threads, during flow
setup. While this patch alone causes a decrease in revalidation
performance, it allows future patches increase performance by reducing
the cost of flow dumping.

Revalidators will continue to create ukeys if a flow is dumped that has
no corresponding ukey. This may happen in corner cases such as when
ovs-vswitchd is restarted (and flows remain in the datapath) or a user
installs a flow using ovs-dpctl.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-25 13:54:24 +12:00
+								                continue;
 								            }
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								            ukey_state = ukey->state;
 								            if (ukey_state == UKEY_OPERATIONAL
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								                || (ukey_state == UKEY_INCONSISTENT)
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								                || (ukey_state == UKEY_VISIBLE && purge)) {
-												ofproto-dpif-upcall: Simplify revalidator_sweep__().

Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
  most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
  to be removed or updated. This depends on a variety of factors such as
  whether the datapath is being flushed, whether individual flows were
  recently dumped, and whether those flows are valid for the current
  revalidation generation.

Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.

While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.

Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:47 -08:00
+								                struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
 								                bool seq_mismatch = (ukey->dump_seq != dump_seq
 								                                     && ukey->reval_seq != reval_seq);
 								                enum reval_result result;
-												ofproto-dpif-upcall: Fix push_dp_ops to handle all errors.

push_dp_ops only handles delete ops errors but ignores the modify
ops results. It's better to handle all the dp operation errors in
a consistent way.

This patch prevents the inconsistency by considering modify failure
in revalidators.

To note, we cannot perform two state transitions and change ukey_state
into UKEY_EVICTED directly here, because, if we do so, the
sweep will remove the ukey alone and leave dp flow alive. Later, the
dump will retrieve the dp flow and might even recover it. This will
contribute the stats of this dp flow twice.

Signed-off-by: Peng He <hepeng.0320@bytedance.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2023-07-01 05:11:16 +00:00
+								                if (purge || ukey_state == UKEY_INCONSISTENT) {
-												ofproto-dpif-upcall: Simplify revalidator_sweep__().

Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
  most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
  to be removed or updated. This depends on a variety of factors such as
  whether the datapath is being flushed, whether individual flows were
  recently dumped, and whether those flows are valid for the current
  revalidation generation.

Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.

While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.

Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:47 -08:00
+								                    result = UKEY_DELETE;
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								                    del_reason = purge ? FDR_PURGE : FDR_UPDATE_FAIL;
-												ofproto-dpif-upcall: Simplify revalidator_sweep__().

Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
  most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
  to be removed or updated. This depends on a variety of factors such as
  whether the datapath is being flushed, whether individual flows were
  recently dumped, and whether those flows are valid for the current
  revalidation generation.

Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.

While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.

Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:47 -08:00
+								                } else if (!seq_mismatch) {
 								                    result = UKEY_KEEP;
 								                } else {
 								                    struct dpif_flow_stats stats;
 								                    COVERAGE_INC(revalidate_missed_dp_flow);
-												ofproto-dpif-upcall: Use last known stats ukey stats on revalidate missed dp flows.

Instead of using all zero stats when executing a revalidate for missed
dp flows, use the last known stats to avoid odd statistics being used.

As these zero stats are stored in the ukey, the next time revalidate_ukey()
is called the delta between the new stats and the zero stats is used, which
would cause an additional increase in total packets/bytes.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Michael Santana <msantana@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-01-05 13:56:59 +01:00
+								                    memcpy(&stats, &ukey->stats, sizeof stats);
-												ofproto-dpif-upcall: Simplify revalidator_sweep__().

Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
  most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
  to be removed or updated. This depends on a variety of factors such as
  whether the datapath is being flushed, whether individual flows were
  recently dumped, and whether those flows are valid for the current
  revalidation generation.

Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.

While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.

Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:47 -08:00
+								                    result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								                                             reval_seq, &recircs, &del_reason);
-												ofproto-dpif-upcall: Simplify revalidator_sweep__().

Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
  most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
  to be removed or updated. This depends on a variety of factors such as
  whether the datapath is being flushed, whether individual flows were
  recently dumped, and whether those flows are valid for the current
  revalidation generation.

Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.

While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.

Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:47 -08:00
+								                }
-												ofproto-dpif-upcall: Avoid stale ukeys leaks.

It is observed in some environments that there are much more ukeys than
actual DP flows. For example:

$ ovs-appctl upcall/show
system@ovs-system:
flows : (current 7) (avg 6) (max 117) (limit 2125)
offloaded flows : 525
dump duration : 1063ms
ufid enabled : true

23: (keys 3612)
24: (keys 3625)
25: (keys 3485)

The revalidator threads are busy revalidating the stale ukeys leading to
high CPU and long dump duration.

This patch tracks the number of consecutive missed dumps. If four dumps
are missed in a row, it is assumed that the datapath flow no longer
exists, and the ukey can be deleted.

Reported-by: Roi Dayan <roid@nvidia.com>
Co-authored-by: Han Zhou <hzhou@ovn.org>
Co-authored-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2024-08-29 09:00:06 +02:00
 								                if (ukey->dump_seq != dump_seq) {
 								                    ukey->missed_dumps++;
 								                    if (ukey->missed_dumps >= 4) {
 								                        /* If the flow was not dumped for 4 revalidator rounds,
 								                         * we can assume the datapath flow no longer exists
 								                         * and the ukey should be deleted. */
 								                        COVERAGE_INC(revalidate_missing_dp_flow);
 								                        del_reason = FDR_FLOW_MISSING_DP;
 								                        result = UKEY_DELETE;
 								                    }
 								                } else {
 								                    ukey->missed_dumps = 0;
 								                }
-												ofproto-dpif-upcall: Simplify revalidator_sweep__().

Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
  most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
  to be removed or updated. This depends on a variety of factors such as
  whether the datapath is being flushed, whether individual flows were
  recently dumped, and whether those flows are valid for the current
  revalidation generation.

Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.

While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.

Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:47 -08:00
+								                if (result != UKEY_KEEP) {
 								                    /* Clears 'recircs' if filled by revalidate_ukey(). */
 								                    reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
 								                                  &odp_actions);
 								                }
-												revalidator: Add a USDT probe during flow deletion with purge reason.

During normal operations, it is useful to understand when a particular flow
gets removed from the system. This can be useful when debugging performance
issues tied to ofproto flow changes, trying to determine deployed traffic
patterns, or while debugging dynamic systems where ports come and go.

Prior to this change, there was a lack of visibility around flow expiration.
The existing debugging infrastructure could tell us when a flow was added to
the datapath, but not when it was removed or why.

This change introduces a USDT probe at the point where the revalidator
determines that the flow should be removed.  Additionally, we track the
reason for the flow eviction and provide that information as well.  With
this change, we can track the complete flow lifecycle for the netlink
datapath by hooking the upcall tracepoint in kernel, the flow put USDT, and
the revalidator USDT, letting us watch as flows are added and removed from
the kernel datapath.

This change only enables this information via USDT probe, so it won't be
possible to access this information any other way (see:
Documentation/topics/usdt-probes.rst).

Also included is a script (utilities/usdt-scripts/flow_reval_monitor.py)
which serves as a demonstration of how the new USDT probe might be used
going forward.

Co-authored-by: Aaron Conole <aconole@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Kevin Sprague <ksprague0711@gmail.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-03-05 10:44:41 -05:00
+								                OVS_USDT_PROBE(revalidator_sweep__, flow_sweep_result, udpif,
 								                               ukey, result, del_reason);
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
+								            }
-												ofproto: Enable in-place modification for recirc actions.

When modifying an existing datapath flow with recirculation actions,
the references to old (if any) recirculation actions need to be freed,
and references to new recirculation actions need to be stored.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2015-11-25 15:19:37 -08:00
+								            ovs_mutex_unlock(&ukey->mutex);
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								            if (ukey_state == UKEY_EVICTED) {
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								                /* The common flow deletion case involves deletion of the flow
 								                 * during the dump phase and ukey deletion here. */
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								                ovs_mutex_lock(&umap->mutex);
 								                ukey_delete(umap, ukey);
 								                ovs_mutex_unlock(&umap->mutex);
-												upcall: Delete flows that were not recently dumped.

Previously, we would clean up the ukeys whose flow was not seen in the
most recent dump, while leaving the flow in the datapath. In the
unlikely case that the datapath fails to dump a flow that still exists
in the datapath, this would cause double-counting of those flow stats.

This is currently very rare to see due to batching of datapath flow
deletion, but is more easily observable with upcoming patches which
modify the batch size based on dpif implementation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:35 -08:00
+								            }
-												ofproto-dpif-upcall: Simplify revalidator_sweep__().

Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
  most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
  to be removed or updated. This depends on a variety of factors such as
  whether the datapath is being flushed, whether individual flows were
  recently dumped, and whether those flows are valid for the current
  revalidation generation.

Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.

While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.

Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:47 -08:00
 								            if (n_ops == REVALIDATE_MAX_BATCH) {
-												ofproto-dpif-upcall: Document revalidator cycle.

Add a series of comments to make it more clear what's happening for
individual ukeys being handled during revalidator dump/sweep cycle.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 16:16:25 -08:00
+								                /* Update/delete missed flows and clean up corresponding ukeys
 								                 * if necessary. */
-												ofproto-dpif-upcall: Simplify revalidator_sweep__().

Broadly, there are two cases that are handled during revalidator_sweep__:
- Ukeys which had their corresponding datapath flows deleted during the
  most recent dump phase need to be deleted.
- If a flow for a ukey still exists in the datapath, the flow may need
  to be removed or updated. This depends on a variety of factors such as
  whether the datapath is being flushed, whether individual flows were
  recently dumped, and whether those flows are valid for the current
  revalidation generation.

Previously, the logic was written such that the first of these cases
would be handled under the "UKEY_KEEP" case to ensure that
revalidator_sweep__() will not attempt to delete flows that already
exist. In this case, ukey->flow_exists would be false, which would
trigger ukey cleanup.

While correct, this is misleading and difficult to follow. Since commit
83b03fe05e7a ("ofproto-dpif-upcall: Avoid double-delete of ukeys."),
this logic is no longer required to prevent double-deletion of such
flows, so we can now make this codepath more straightforward.

Signed-off-by: Joe Stringer <joe@ovn.org>
Co-authored-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-01-07 11:47:47 -08:00
+								                push_ukey_ops(udpif, umap, ops, n_ops);
 								                n_ops = 0;
 								            }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        }
-												upcall: Delete flows that were not recently dumped.

Previously, we would clean up the ukeys whose flow was not seen in the
most recent dump, while leaving the flow in the datapath. In the
unlikely case that the datapath fails to dump a flow that still exists
in the datapath, this would cause double-counting of those flow stats.

This is currently very rare to see due to batching of datapath flow
deletion, but is more easily observable with upcoming patches which
modify the batch size based on dpif implementation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:35 -08:00
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								        if (n_ops) {
-												upcall: Rename dump_op -> ukey_op.

Future patches will make use of the 'struct dump_op' in a broader sense,
so this patch renames it to make things a bit clearer.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-21 00:21:03 +12:00
+								            push_ukey_ops(udpif, umap, ops, n_ops);
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								        }
-												ofproto: Allow in-place modifications of datapath flows.

There are certain use cases (such as bond rebalancing) where a
datapath flow's actions may change, while it's wildcard pattern
remains the same.  Before this patch, revalidators would note the
change, delete the flow, and wait for the handlers to install an
updated version.  This is inefficient, as many packets could get
punted to userspace before the new flow is finally installed.

To improve the situation, this patch implements in place modification
of datapath flows.  If the revalidators detect the only change to a
given ukey is its actions, instead of deleting it, it does a put with
the MODIFY flag set.

Signed-off-by: Ethan J. Jackson <ethan@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2015-08-03 18:43:53 -07:00
 								        ofpbuf_uninit(&odp_actions);
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								        ovsrcu_quiesce();
-												upcall: Delete flows that were not recently dumped.

Previously, we would clean up the ukeys whose flow was not seen in the
most recent dump, while leaving the flow in the datapath. In the
unlikely case that the datapath fails to dump a flow that still exists
in the datapath, this would cause double-counting of those flow stats.

This is currently very rare to see due to batching of datapath flow
deletion, but is more easily observable with upcoming patches which
modify the batch size based on dpif implementation.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:35 -08:00
+								    }
-												ofproto-dpif: Implement multi-threaded miss handling.

This patch factors flow miss handling into its own module,
ofproto-dpif-upcall which can utilize multiple threads to process
misses.  For some important benchmarks, this change improves Open
vSwitch flow setup performance by roughly 50x (that's 50 times not
50%) in my testing.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-25 14:45:43 -07:00
+								}
-												upcall: Remove datapath flows when setting n-threads.

Previously, we would delete all ukeys when changing the number of
threads, but leave all flows in the datapath. This would cause
double-counting of stats for any flows that remain in the datapath. This
patch fixes the issue by ensuring that all flows are deleted from the
datapath before changing the number of threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-11 13:55:36 -08:00
 								static void
 								revalidator_sweep(struct revalidator *revalidator)
 								{
 								    revalidator_sweep__(revalidator, false);
 								}
 								static void
 								revalidator_purge(struct revalidator *revalidator)
 								{
 								    revalidator_sweep__(revalidator, true);
 								}
-												dpif-netdev: Purge all ukeys when reconfigure pmd.

When dpdk configuration changes, all pmd threads are recreated
and rx queues of each port are reloaded.  After this process,
rx queue could be mapped to a different pmd thread other than
the one before reconfiguration.  However, this is totally
transparent to ofproto layer modules.  So, if the ofproto-dpif-upcall
module still holds ukeys generated before pmd thread recreation,
this old ukey will collide with the ukey for the new upcalls
from same traffic flow, causing flow installation failure.

To fix the bug, this commit adds a new call-back function
in dpif layer for notifying upper layer the purging of datapath
(e.g. pmd thread deletion in dpif-netdev).  So, the
ofproto-dpif-upcall module can react properly with deleting
the ukeys and with collecting flows' last stats.

Reported-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
Tested-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-25 16:36:46 -07:00
 								/* In reaction to dpif purge, purges all 'ukey's with same 'pmd_id'. */
 								static void
 								dp_purge_cb(void *aux, unsigned pmd_id)
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								    OVS_NO_THREAD_SAFETY_ANALYSIS
-												dpif-netdev: Purge all ukeys when reconfigure pmd.

When dpdk configuration changes, all pmd threads are recreated
and rx queues of each port are reloaded.  After this process,
rx queue could be mapped to a different pmd thread other than
the one before reconfiguration.  However, this is totally
transparent to ofproto layer modules.  So, if the ofproto-dpif-upcall
module still holds ukeys generated before pmd thread recreation,
this old ukey will collide with the ukey for the new upcalls
from same traffic flow, causing flow installation failure.

To fix the bug, this commit adds a new call-back function
in dpif layer for notifying upper layer the purging of datapath
(e.g. pmd thread deletion in dpif-netdev).  So, the
ofproto-dpif-upcall module can react properly with deleting
the ukeys and with collecting flows' last stats.

Reported-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
Tested-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-25 16:36:46 -07:00
+								{
 								    struct udpif *udpif = aux;
 								    size_t i;
 								    udpif_pause_revalidators(udpif);
 								    for (i = 0; i < N_UMAPS; i++) {
 								        struct ukey_op ops[REVALIDATE_MAX_BATCH];
 								        struct udpif_key *ukey;
 								        struct umap *umap = &udpif->ukeys[i];
 								        size_t n_ops = 0;
 								        CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) {
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								            if (ukey->pmd_id == pmd_id) {
-												dpif-netdev: Purge all ukeys when reconfigure pmd.

When dpdk configuration changes, all pmd threads are recreated
and rx queues of each port are reloaded.  After this process,
rx queue could be mapped to a different pmd thread other than
the one before reconfiguration.  However, this is totally
transparent to ofproto layer modules.  So, if the ofproto-dpif-upcall
module still holds ukeys generated before pmd thread recreation,
this old ukey will collide with the ukey for the new upcalls
from same traffic flow, causing flow installation failure.

To fix the bug, this commit adds a new call-back function
in dpif layer for notifying upper layer the purging of datapath
(e.g. pmd thread deletion in dpif-netdev).  So, the
ofproto-dpif-upcall module can react properly with deleting
the ukeys and with collecting flows' last stats.

Reported-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
Tested-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-25 16:36:46 -07:00
+								                delete_op_init(udpif, &ops[n_ops++], ukey);
-												upcall: Track ukey states.

Ukeys have a defined lifetime that starts from being created, inserted
into the umaps, having the corresponding flow installed, then the flow
deleted, the ukey removed from the umap, rcu-deferral of its deletion,
and finally freedom.

However, until now it's all been represented behind a simple boolean
"flow_exists" with a bunch of implicit logic sprinkled around the
accessors. This patch attempts to make the ukey lifetime a bit clearer
by outlining the correct transitions and asserting that their lifetime
proceeds as expected.

This should improve the readability of the current code, and also make
the following patch easier to reason about.

Signed-off-by: Joe Stringer <joe@ovn.org>
Acked-by: Jarno Rajahalme <jarno@ovn.org>

											
										
										
											2016-08-31 11:06:04 -07:00
+								                transition_ukey(ukey, UKEY_EVICTING);
-												dpif-netdev: Purge all ukeys when reconfigure pmd.

When dpdk configuration changes, all pmd threads are recreated
and rx queues of each port are reloaded.  After this process,
rx queue could be mapped to a different pmd thread other than
the one before reconfiguration.  However, this is totally
transparent to ofproto layer modules.  So, if the ofproto-dpif-upcall
module still holds ukeys generated before pmd thread recreation,
this old ukey will collide with the ukey for the new upcalls
from same traffic flow, causing flow installation failure.

To fix the bug, this commit adds a new call-back function
in dpif layer for notifying upper layer the purging of datapath
(e.g. pmd thread deletion in dpif-netdev).  So, the
ofproto-dpif-upcall module can react properly with deleting
the ukeys and with collecting flows' last stats.

Reported-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
Tested-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-25 16:36:46 -07:00
+								                if (n_ops == REVALIDATE_MAX_BATCH) {
 								                    push_ukey_ops(udpif, umap, ops, n_ops);
 								                    n_ops = 0;
 								                }
 								            }
 								        }
 								        if (n_ops) {
 								            push_ukey_ops(udpif, umap, ops, n_ops);
 								        }
 								        ovsrcu_quiesce();
 								    }
 								    udpif_resume_revalidators(udpif);
 								}
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
 								static void
 								upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                    const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 								{
 								    struct ds ds = DS_EMPTY_INITIALIZER;
-												dpif-netlink: Fix issues of the offloaded flows counter.

The n_offloaded_flows counter is saved in dpif, and this is the first
one when ofproto is created. When flow operation is done by ovs-appctl
commands, such as, dpctl/add-flow, a new dpif is opened, and the
n_offloaded_flows in it can't be used. So, instead of using counter,
the number of offloaded flows is queried from each netdev, then sum
them up. To achieve this, a new API is added in netdev_flow_api to get
how many flows assigned to a netdev.

In order to get better performance, this number is calculated directly
from tc_to_ufid hmap for netdev-offload-tc, because flow dumping by tc
takes much time if there are many flows offloaded.

Fixes: af0618470507 ("dpif-netlink: Count the number of offloaded rules")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-12-17 02:47:32 +00:00
+								    uint64_t n_offloaded_flows;
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								    struct udpif *udpif;
 								    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        unsigned int flow_limit;
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        bool ufid_enabled;
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								        size_t i;
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								        atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
-												upcall: Simplify enable_ufid debug option.

We previously tracked the debug enable/disable of UFID in each udpif,
and allowed the ovs-appctl debug option to turn on UFID features even if
the datapath doesn't support it.

This commit shifts the enable_ufid debug flag to a single flag, and
provides a helper to determine whether UFID features should be used on a
per-udpif basis.

Suggested-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-19 09:54:38 -08:00
+								        ufid_enabled = udpif_use_ufid(udpif);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								        ds_put_format(&ds, "%s:\n", dpif_name(udpif->dpif));
-												ofproto-dpif-upcall: Remove tabs from output.

OVS uses spaces for indentation in source code and it makes sense for it to
also use spaces for indentation in output.  Spaces also consume less
horizontal space in output, which often makes it easier to read.  This
commit transitions one part of output from tabs to spaces and updates
appropriate parts of the tests to match.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2018-05-25 17:02:22 -07:00
+								        ds_put_format(&ds, "  flows         : (current %lu)"
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								            " (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif),
 								            udpif->avg_n_flows, udpif->max_n_flows, flow_limit);
-												dpif-netlink: Fix issues of the offloaded flows counter.

The n_offloaded_flows counter is saved in dpif, and this is the first
one when ofproto is created. When flow operation is done by ovs-appctl
commands, such as, dpctl/add-flow, a new dpif is opened, and the
n_offloaded_flows in it can't be used. So, instead of using counter,
the number of offloaded flows is queried from each netdev, then sum
them up. To achieve this, a new API is added in netdev_flow_api to get
how many flows assigned to a netdev.

In order to get better performance, this number is calculated directly
from tc_to_ufid hmap for netdev-offload-tc, because flow dumping by tc
takes much time if there are many flows offloaded.

Fixes: af0618470507 ("dpif-netlink: Count the number of offloaded rules")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-12-17 02:47:32 +00:00
+								        if (!dpif_get_n_offloaded_flows(udpif->dpif, &n_offloaded_flows)) {
 								            ds_put_format(&ds, "  offloaded flows : %"PRIu64"\n",
 								                          n_offloaded_flows);
 								        }
-												ofproto-dpif-upcall: Remove tabs from output.

OVS uses spaces for indentation in source code and it makes sense for it to
also use spaces for indentation in output.  Spaces also consume less
horizontal space in output, which often makes it easier to read.  This
commit transitions one part of output from tabs to spaces and updates
appropriate parts of the tests to match.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2018-05-25 17:02:22 -07:00
+								        ds_put_format(&ds, "  dump duration : %lldms\n", udpif->dump_duration);
 								        ds_put_format(&ds, "  ufid enabled : ");
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								        if (ufid_enabled) {
 								            ds_put_format(&ds, "true\n");
 								        } else {
 								            ds_put_format(&ds, "false\n");
 								        }
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        ds_put_char(&ds, '\n');
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
-												ofproto-dpif-upcall: Fix n_revalidators on upcall show.

When upcall/show is used to collect upcall statistics from a grafana
collector or some agent, upcall/show can be called even during ovs
restart. Occasionally ovs will crash when the revalidator thread
is not really inited. Backtrace:

(gdb) bt
- 0 upcall_unixctl_show at ofproto/ofproto-dpif-upcall.c:2885
- 1 process_command at lib/unixctl.c:308
- 2 run_connection at lib/unixctl.c:342
- 3 unixctl_server_run at lib/unixctl.c:393
- 4 main at vswitchd/ovs-v$witchd.c:140

Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Signed-off-by: Wan Junjie <wanjunjie@bytedance.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-01-25 18:18:56 +08:00
+								        for (i = 0; i < udpif->n_revalidators; i++) {
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								            struct revalidator *revalidator = &udpif->revalidators[i];
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								            int j, elements = 0;
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
-												ofproto-dpif-upcall: Fix n_revalidators on upcall show.

When upcall/show is used to collect upcall statistics from a grafana
collector or some agent, upcall/show can be called even during ovs
restart. Occasionally ovs will crash when the revalidator thread
is not really inited. Backtrace:

(gdb) bt
- 0 upcall_unixctl_show at ofproto/ofproto-dpif-upcall.c:2885
- 1 process_command at lib/unixctl.c:308
- 2 run_connection at lib/unixctl.c:342
- 3 unixctl_server_run at lib/unixctl.c:393
- 4 main at vswitchd/ovs-v$witchd.c:140

Fixes: e79a6c833e0d ("ofproto: Handle flow installation and eviction in upcall.")
Signed-off-by: Wan Junjie <wanjunjie@bytedance.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-01-25 18:18:56 +08:00
+								            for (j = i; j < N_UMAPS; j += udpif->n_revalidators) {
-												udpif: Separate udpif_key maps from revalidators.

An upcoming patch will change the access patterns for ukey maps to
increase the number of writers, and shift write-access from revalidator
threads to upcall handler threads. As such, it no longer makes sense to
tie these maps to revalidators in a 1:1 relationship.

This patch separates the ukey maps from the revalidators, and increases
the number of maps used to store ukeys, to reduce contention.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-05 17:28:46 +12:00
+								                elements += cmap_count(&udpif->ukeys[j].cmap);
 								            }
-												ofproto-dpif-upcall: Remove tabs from output.

OVS uses spaces for indentation in source code and it makes sense for it to
also use spaces for indentation in output.  Spaces also consume less
horizontal space in output, which often makes it easier to read.  This
commit transitions one part of output from tabs to spaces and updates
appropriate parts of the tests to match.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2018-05-25 17:02:22 -07:00
+								            ds_put_format(&ds, "  %u: (keys %d)\n", revalidator->id, elements);
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								        }
-												ofproto-dpif-upcall: New ovs-appctl upcall/show.

Shows debugging information related to upcall handling.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-11-20 18:06:12 -08:00
+								    }
 								    unixctl_command_reply(conn, ds_cstr(&ds));
 								    ds_destroy(&ds);
 								}
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
 								/* Disable using the megaflows.
 								 *
 								 * This command is only needed for advanced debugging, so it's not
 								 * documented in the man page. */
 								static void
 								upcall_unixctl_disable_megaflows(struct unixctl_conn *conn,
 								                                 int argc OVS_UNUSED,
 								                                 const char *argv[] OVS_UNUSED,
 								                                 void *aux OVS_UNUSED)
 								{
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								    atomic_store_relaxed(&enable_megaflows, false);
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
+								    udpif_flush_all_datapaths();
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    unixctl_command_reply(conn, "megaflows disabled");
 								}
 								/* Re-enable using megaflows.
 								 *
 								 * This command is only needed for advanced debugging, so it's not
 								 * documented in the man page. */
 								static void
 								upcall_unixctl_enable_megaflows(struct unixctl_conn *conn,
 								                                int argc OVS_UNUSED,
 								                                const char *argv[] OVS_UNUSED,
 								                                void *aux OVS_UNUSED)
 								{
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								    atomic_store_relaxed(&enable_megaflows, true);
-												udpif:  Bug fix updif_flush

Before this commit, all datapath flows are cleared with dpif_flush(),
but the revalidator thread still holds ukeys, which are caches of the
datapath flows in the revalidaor.  Flushing ukeys causes flow_del
messages to be sent to the datapath again on flows that have been
deleted by the dpif_flush() already.

Double deletion by itself is not problem, per se, may an efficiency
issue. However, for ever flow_del message sent to the datapath, a log
message, at the warning level, will be generated in case datapath
failed to execute the command. In addition to cause spurious log
messages, Double deletion causes unit tests to report erroneous
failures as all warning messages are considered test failures.

The fix is to simply shut down the revalidator threads to flush all
ukeys, then flush the datapth before restarting the revalidator threads.

dpif_flush() was implemented as flush flows of all datapaths while
most of its invocation should only flush its local datapath.
Only megaflow on/off commands should flush all dapapaths. This bug is
also fixed.

Found during development.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-03-13 21:48:55 -07:00
+								    udpif_flush_all_datapaths();
-												ofproto: Handle flow installation and eviction in upcall.

This patch moves flow installation and eviction from ofproto-dpif and
the main thread, into ofproto-dpif-upcall.  This performs
significantly better (approximately 2x TCP_CRR improvement), and
allows ovs-vswitchd to maintain significantly larger datapath flow
tables.  On top of that, it significantly simplifies the code,
retiring "struct facet" and friends.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-09-24 13:39:56 -07:00
+								    unixctl_command_reply(conn, "megaflows enabled");
 								}
-												upcall: Add appctl call to set flow_limit.

This should assist testing of datapath performance, as it allows us to
skip "warming up" the flow limit value.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-06 09:49:19 -08:00
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								/* Disable skipping flow attributes during flow dump.
 								 *
 								 * This command is only needed for advanced debugging, so it's not
 								 * documented in the man page. */
 								static void
 								upcall_unixctl_disable_ufid(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                           const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 								{
-												upcall: Simplify enable_ufid debug option.

We previously tracked the debug enable/disable of UFID in each udpif,
and allowed the ovs-appctl debug option to turn on UFID features even if
the datapath doesn't support it.

This commit shifts the enable_ufid debug flag to a single flag, and
provides a helper to determine whether UFID features should be used on a
per-udpif basis.

Suggested-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-19 09:54:38 -08:00
+								    atomic_store_relaxed(&enable_ufid, false);
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    unixctl_command_reply(conn, "Datapath dumping tersely using UFID disabled");
 								}
 								/* Re-enable skipping flow attributes during flow dump.
 								 *
 								 * This command is only needed for advanced debugging, so it's not documented
 								 * in the man page. */
 								static void
 								upcall_unixctl_enable_ufid(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                          const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 								{
-												upcall: Simplify enable_ufid debug option.

We previously tracked the debug enable/disable of UFID in each udpif,
and allowed the ovs-appctl debug option to turn on UFID features even if
the datapath doesn't support it.

This commit shifts the enable_ufid debug flag to a single flag, and
provides a helper to determine whether UFID features should be used on a
per-udpif basis.

Suggested-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-19 09:54:38 -08:00
+								    atomic_store_relaxed(&enable_ufid, true);
 								    unixctl_command_reply(conn, "Datapath dumping tersely using UFID enabled "
 								                                "for supported datapaths");
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								}
-												upcall: Add appctl call to set flow_limit.

This should assist testing of datapath performance, as it allows us to
skip "warming up" the flow limit value.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-06 09:49:19 -08:00
+								/* Set the flow limit.
 								 *
 								 * This command is only needed for advanced debugging, so it's not
 								 * documented in the man page. */
 								static void
 								upcall_unixctl_set_flow_limit(struct unixctl_conn *conn,
 								                              int argc OVS_UNUSED,
-												ofproto-dpif-upcall: Remove OVS_UNUSED from upcall_unixctl_set_flow_limit() arg.

The 'argv' argument is used.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>

											
										
										
											2016-12-19 14:18:25 -08:00
+								                              const char *argv[],
-												upcall: Add appctl call to set flow_limit.

This should assist testing of datapath performance, as it allows us to
skip "warming up" the flow limit value.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-06 09:49:19 -08:00
+								                              void *aux OVS_UNUSED)
 								{
 								    struct ds ds = DS_EMPTY_INITIALIZER;
 								    struct udpif *udpif;
 								    unsigned int flow_limit = atoi(argv[1]);
 								    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
-												ofproto/ofproto-dpif-upcall: Use relaxed atomic operations.

Neither 'enable_megaflows', 'udpif->flow_limit', 'udpif->n_flows', nor
'udpif->n_flows_timestamp' are used to synchronize the state of any
other variables, so we can use relaxed atomic operations to access
them.

Move the atomic read operation of 'enable_megaflows' outside the loop
in handle_upcalls().

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-29 10:34:53 -07:00
+								        atomic_store_relaxed(&udpif->flow_limit, flow_limit);
-												upcall: Add appctl call to set flow_limit.

This should assist testing of datapath performance, as it allows us to
skip "warming up" the flow limit value.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-06 09:49:19 -08:00
+								    }
 								    ds_put_format(&ds, "set flow_limit to %u\n", flow_limit);
 								    unixctl_command_reply(conn, ds_cstr(&ds));
 								    ds_destroy(&ds);
 								}
-												udpif: Add command to wait for revalidation.

This allows us to remove some of the sleeps from the testsuite.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-25 14:02:45 +00:00
 								static void
 								upcall_unixctl_dump_wait(struct unixctl_conn *conn,
 								                         int argc OVS_UNUSED,
 								                         const char *argv[] OVS_UNUSED,
 								                         void *aux OVS_UNUSED)
 								{
-												list: Rename all functions in list.h with ovs_ prefix.

This attempts to prevent namespace collisions with other list libraries

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:22 -07:00
+								    if (ovs_list_is_singleton(&all_udpifs)) {
-												Fix remaining "uninitialized local variable" used warning by MSVC.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-15 10:10:34 -07:00
+								        struct udpif *udpif = NULL;
-												udpif: Add command to wait for revalidation.

This allows us to remove some of the sleeps from the testsuite.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-25 14:02:45 +00:00
+								        size_t len;
-												list: Rename all functions in list.h with ovs_ prefix.

This attempts to prevent namespace collisions with other list libraries

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:22 -07:00
+								        udpif = OBJECT_CONTAINING(ovs_list_front(&all_udpifs), udpif, list_node);
-												udpif: Add command to wait for revalidation.

This allows us to remove some of the sleeps from the testsuite.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-25 14:02:45 +00:00
+								        len = (udpif->n_conns + 1) * sizeof *udpif->conns;
 								        udpif->conn_seq = seq_read(udpif->dump_seq);
 								        udpif->conns = xrealloc(udpif->conns, len);
 								        udpif->conns[udpif->n_conns++] = conn;
 								    } else {
 								        unixctl_command_reply_error(conn, "can't wait on multiple udpifs.");
 								    }
 								}
-												tests: Add command to purge revalidators of flows.

This patch adds a new 'ovs-appctl revalidator/purge' command which
flushes all flows from all datapaths, and updates the revalidator
udpif_key cache at the same time.

Update the ofproto-dpif fragment tests which may fail when ukeys are
created from handler threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-13 10:42:47 -08:00
 								static void
 								upcall_unixctl_purge(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                     const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 								{
 								    struct udpif *udpif;
 								    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
-												ofproto-dpif-upcall: Pause revalidators when purging.

This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.

A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.

This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:

2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
	revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
	flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
	packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
	recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
	ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
	packet_type(ns=0,id=0),
	eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
	eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
	ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
	used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
	failed to flow_get (No such file or directory)
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""

To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.

Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:

2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
	revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
	skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
	ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
	dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
	tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
	flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
	failed to flow_get (No such file or directory)
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9

To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.

As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.

Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-10-18 16:23:53 +02:00
+								        bool wake_up = false;
-												tests: Add command to purge revalidators of flows.

This patch adds a new 'ovs-appctl revalidator/purge' command which
flushes all flows from all datapaths, and updates the revalidator
udpif_key cache at the same time.

Update the ofproto-dpif fragment tests which may fail when ukeys are
created from handler threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-13 10:42:47 -08:00
+								        int n;
-												ofproto-dpif-upcall: Pause revalidators when purging.

This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.

A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.

This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:

2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
	revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
	flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
	packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
	recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
	ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
	packet_type(ns=0,id=0),
	eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
	eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
	ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
	used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
	failed to flow_get (No such file or directory)
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""

To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.

Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:

2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
	revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
	skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
	ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
	dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
	tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
	flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
	failed to flow_get (No such file or directory)
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9

To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.

As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.

Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-10-18 16:23:53 +02:00
+								        if (!latch_is_set(&udpif->pause_latch)) {
 								            udpif_pause_revalidators(udpif);
 								            wake_up = true;
 								        }
-												tests: Add command to purge revalidators of flows.

This patch adds a new 'ovs-appctl revalidator/purge' command which
flushes all flows from all datapaths, and updates the revalidator
udpif_key cache at the same time.

Update the ofproto-dpif fragment tests which may fail when ukeys are
created from handler threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-13 10:42:47 -08:00
+								        for (n = 0; n < udpif->n_revalidators; n++) {
 								            revalidator_purge(&udpif->revalidators[n]);
 								        }
-												ofproto-dpif-upcall: Pause revalidators when purging.

This issue has been observed when running traffic tests with a dpdk
enabled userspace datapath (though those tests are added in a separate
series).
However, the described issue also affects the kernel datapath which is
why this patch is sent separately.

A main thread executing the 'revalidator/purge' command could race with
revalidator threads that can be dumping/sweeping the purged flows at the
same time.

This race can be reproduced (with dpif debug logs) by running the
conntrack - ICMP related unit tests with the userspace datapath:

2023-10-09T14:11:55.242Z|00177|unixctl|DBG|received request
	revalidator/purge[], id=0
2023-10-09T14:11:55.242Z|00044|dpif(revalidator17)|DBG|netdev@ovs-netdev:
	flow_dump ufid:68ff6817-fb3b-4b30-8412-9cf175318294 <empty>,
	packets:0, bytes:0, used:never
2023-10-09T14:11:55.242Z|00178|dpif|DBG|netdev@ovs-netdev: flow_del
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
	recirc_id(0),dp_hash(0),skb_priority(0),in_port(2),skb_mark(0),
	ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),
	packet_type(ns=0,id=0),
	eth(src=a6:0a:bf:e2:f3:f2,dst=62:23:0f:f6:2c:75),
	eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,
	ttl=64,frag=no),udp(src=37380,dst=10000), packets:0, bytes:0,
	used:never
...
2023-10-09T14:11:55.242Z|00049|dpif(revalidator17)|WARN|netdev@ovs-netdev:
	failed to flow_get (No such file or directory)
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:11:55.242Z|00050|ofproto_dpif_upcall(revalidator17)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:07046e91-30a6-4862-9048-1a76b5a88a5b
...
2023-10-09T14:11:55.242Z|00183|unixctl|DBG|replying with success, id=0: ""

To avoid this race, a first part of the fix is to pause (if not already
paused) the revalidators while the main thread is purging the datapath
flows.

Then a second issue is observed by running the same unit test with the
kernel datapath. Its dpif implementation dumps flows via a netlink request
(see dpif_flow_dump_create(), dpif_netlink_flow_dump_create(),
nl_dump_start(), nl_sock_send__()) in the leader revalidator thread,
before pausing revalidators:

2023-10-09T14:44:28.742Z|00122|unixctl|DBG|received request
	revalidator/purge[], id=0
...
2023-10-09T14:44:28.742Z|00125|dpif|DBG|system@ovs-system: flow_del
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 recirc_id(0),dp_hash(0),
	skb_priority(0),in_port(2),skb_mark(0),ct_state(0),ct_zone(0),
	ct_mark(0),ct_label(0),eth(src=a6:0a:bf:e2:f3:f2,
	dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=10.1.1.1,
	tip=10.1.1.2,op=1,sha=a6:0a:bf:e2:f3:f2,tha=00:00:00:00:00:00),
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00129|unixctl|DBG|replying with success, id=0: ""
...
2023-10-09T14:44:28.742Z|00006|dpif(revalidator21)|DBG|system@ovs-system:
	flow_dump ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>,
	packets:0, bytes:0, used:never
...
2023-10-09T14:44:28.742Z|00012|dpif(revalidator21)|WARN|system@ovs-system:
	failed to flow_get (No such file or directory)
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9 <empty>, packets:0,
	bytes:0, used:never
2023-10-09T14:44:28.742Z|00013|ofproto_dpif_upcall(revalidator21)|WARN|
	Failed to acquire udpif_key corresponding to unexpected flow
	(No such file or directory):
	ufid:70102d81-30a1-44b9-aa76-3d02a9ffd2c9

To avoid evaluating already deleted flows, the second part of the fix is
to ensure that dumping from the leader revalidator thread is done out of
any pause request.

As a result of this patch, the unit test "offloads - delete ufid mapping
if device not exist - offloads enabled" does not need to waive the random
warning logs when purging dp flows.

Fixes: 98bb4286970d ("tests: Add command to purge revalidators of flows.")
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Simon Horman <horms@ovn.org>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-10-18 16:23:53 +02:00
+								        if (wake_up) {
 								            udpif_resume_revalidators(udpif);
 								        }
-												tests: Add command to purge revalidators of flows.

This patch adds a new 'ovs-appctl revalidator/purge' command which
flushes all flows from all datapaths, and updates the revalidator
udpif_key cache at the same time.

Update the ofproto-dpif fragment tests which may fail when ukeys are
created from handler threads.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>

											
										
										
											2014-11-13 10:42:47 -08:00
+								    }
 								    unixctl_command_reply(conn, "");
 								}
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
-												ofproto-dpif-upcall: Add debug commands to pause/resume revalidators.

New commands 'revalidator/pause' and 'revalidator/resume'.
Not documented, since these should not be used in production
environments.

Will be used for unit tests in the next commit.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-13 21:08:51 +02:00
+								static void
 								upcall_unixctl_pause(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                     const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 								{
 								    struct udpif *udpif;
 								    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
 								        udpif_pause_revalidators(udpif);
 								    }
 								    unixctl_command_reply(conn, "");
 								}
 								static void
 								upcall_unixctl_resume(struct unixctl_conn *conn, int argc OVS_UNUSED,
 								                      const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 								{
 								    struct udpif *udpif;
 								    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
 								        udpif_resume_revalidators(udpif);
 								    }
 								    unixctl_command_reply(conn, "");
 								}
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
+								static void
 								upcall_unixctl_ofproto_detrace(struct unixctl_conn *conn, int argc,
 								                               const char *argv[], void *aux OVS_UNUSED)
 								{
 								    const char *key_s = argv[1];
-												ofproto: Fix default pmd_id for ofproto/detrace.

The system and netdev datapath have different default pmd_id, which
resulted in empty output of ofproto/detrace with kernel datapath.
Also indicate that the UFID or cache wasn't available.

Make sure we use the correct default pmd_id when it's not specified
as an argument. At the same time move slightly adjusted test into
system tests so it is tested with both datapaths.

Fixes: 600125b2c380 ("ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.")
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-12-05 15:50:32 +01:00
+								    const char *pmd_str = NULL;
 								    unsigned int pmd_id;
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
+								    ovs_u128 ufid;
 								    if (odp_ufid_from_string(key_s, &ufid) <= 0) {
 								        unixctl_command_reply_error(conn, "failed to parse ufid");
 								        return;
 								    }
 								    if (argc == 3) {
-												ofproto: Fix default pmd_id for ofproto/detrace.

The system and netdev datapath have different default pmd_id, which
resulted in empty output of ofproto/detrace with kernel datapath.
Also indicate that the UFID or cache wasn't available.

Make sure we use the correct default pmd_id when it's not specified
as an argument. At the same time move slightly adjusted test into
system tests so it is tested with both datapaths.

Fixes: 600125b2c380 ("ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.")
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-12-05 15:50:32 +01:00
+								        pmd_str = argv[2];
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
+								        if (!ovs_scan(pmd_str, "pmd=%d", &pmd_id)) {
 								            unixctl_command_reply_error(conn,
 								                                        "Invalid pmd argument format. "
 								                                        "Expecting 'pmd=PMD-ID'");
 								            return;
 								        }
 								    }
 								    struct ds ds = DS_EMPTY_INITIALIZER;
 								    struct udpif *udpif;
 								    LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
-												ofproto: Fix default pmd_id for ofproto/detrace.

The system and netdev datapath have different default pmd_id, which
resulted in empty output of ofproto/detrace with kernel datapath.
Also indicate that the UFID or cache wasn't available.

Make sure we use the correct default pmd_id when it's not specified
as an argument. At the same time move slightly adjusted test into
system tests so it is tested with both datapaths.

Fixes: 600125b2c380 ("ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.")
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-12-05 15:50:32 +01:00
+								        if (!pmd_str) {
 								            const char *type = dpif_normalize_type(dpif_type(udpif->dpif));
 								            pmd_id = !strcmp(type, "system") ? PMD_ID_NULL : NON_PMD_CORE_ID;
 								        }
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
+								        struct udpif_key *ukey = ukey_lookup(udpif, &ufid, pmd_id);
 								        if (!ukey) {
-												ofproto: Fix default pmd_id for ofproto/detrace.

The system and netdev datapath have different default pmd_id, which
resulted in empty output of ofproto/detrace with kernel datapath.
Also indicate that the UFID or cache wasn't available.

Make sure we use the correct default pmd_id when it's not specified
as an argument. At the same time move slightly adjusted test into
system tests so it is tested with both datapaths.

Fixes: 600125b2c380 ("ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.")
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-12-05 15:50:32 +01:00
+								            ds_put_format(&ds, "UFID was not found for %s\n",
 								                          dpif_name(udpif->dpif));
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
+								            continue;
 								        }
 								        ovs_mutex_lock(&ukey->mutex);
 								        /* It only makes sense to format rules for ukeys that are (still)
 								         * in use. */
 								        if ((ukey->state == UKEY_VISIBLE || ukey->state == UKEY_OPERATIONAL)
 								            && ukey->xcache) {
 								            xlate_xcache_format(&ds, ukey->xcache);
-												ofproto: Fix default pmd_id for ofproto/detrace.

The system and netdev datapath have different default pmd_id, which
resulted in empty output of ofproto/detrace with kernel datapath.
Also indicate that the UFID or cache wasn't available.

Make sure we use the correct default pmd_id when it's not specified
as an argument. At the same time move slightly adjusted test into
system tests so it is tested with both datapaths.

Fixes: 600125b2c380 ("ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.")
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>

											
										
										
											2024-12-05 15:50:32 +01:00
+								        } else {
 								            ds_put_format(&ds, "Cache was not found for %s\n",
 								                          dpif_name(udpif->dpif));
-												ofproto: Add ofproto/detrace command to map UFIDs to OpenFlow.

It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), ....

b. dump related OpenFlow rules and groups:
  $ ovs-appctl ofproto/detrace ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.

Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-12 15:47:55 +02:00
+								        }
 								        ovs_mutex_unlock(&ukey->mutex);
 								    }
 								    unixctl_command_reply(conn, ds_cstr(&ds));
 								    ds_destroy(&ds);
 								}
-												ofproto-dpif-upcall: Add debug commands to pause/resume revalidators.

New commands 'revalidator/pause' and 'revalidator/resume'.
Not documented, since these should not be used in production
environments.

Will be used for unit tests in the next commit.

Acked-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-09-13 21:08:51 +02:00
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								/* Flows are sorted in the following order:
 								 * netdev, flow state (offloaded/kernel path), flow_pps_rate.
 								 */
 								static int
 								flow_compare_rebalance(const void *elem1, const void *elem2)
 								{
 								    const struct udpif_key *f1 = *(struct udpif_key **)elem1;
 								    const struct udpif_key *f2 = *(struct udpif_key **)elem2;
 								    int64_t diff;
 								    if (f1->in_netdev < f2->in_netdev) {
 								        return -1;
 								    } else if (f1->in_netdev > f2->in_netdev) {
 								        return 1;
 								    }
 								    if (f1->offloaded != f2->offloaded) {
 								        return f2->offloaded - f1->offloaded;
 								    }
 								    diff = (f1->offloaded == true) ?
 								        f1->flow_pps_rate - f2->flow_pps_rate :
 								        f2->flow_pps_rate - f1->flow_pps_rate;
 								    return (diff < 0) ? -1 : 1;
 								}
 								/* Insert flows from pending array during rebalancing */
 								static int
 								rebalance_insert_pending(struct udpif *udpif, struct udpif_key **pending_flows,
 								                         int pending_count, int insert_count,
 								                         uint64_t rate_threshold)
 								{
 								    int count = 0;
 								    for (int i = 0; i < pending_count; i++) {
 								        struct udpif_key *flow = pending_flows[i];
 								        int err;
 								        /* Stop offloading pending flows if the insert count is
 								         * reached and the flow rate is less than the threshold
 								         */
 								        if (count >= insert_count && flow->flow_pps_rate < rate_threshold) {
 								                break;
 								        }
 								        /* Offload the flow to netdev */
 								        err = udpif_flow_program(udpif, flow, DPIF_OFFLOAD_ALWAYS);
 								        if (err == ENOSPC) {
 								            /* Stop if we are out of resources */
 								            break;
 								        }
 								        if (err) {
 								            continue;
 								        }
 								        /* Offload succeeded; delete it from the kernel datapath */
 								        udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_NEVER);
 								        /* Change the state of the flow, adjust dpif counters */
 								        flow->offloaded = true;
 								        udpif_set_ukey_backlog_packets(flow);
 								        count++;
 								    }
 								    return count;
 								}
 								/* Remove flows from offloaded array during rebalancing */
 								static void
 								rebalance_remove_offloaded(struct udpif *udpif,
 								                           struct udpif_key **offloaded_flows,
 								                           int offload_count)
 								{
 								    for (int i = 0; i < offload_count; i++) {
 								        struct udpif_key *flow = offloaded_flows[i];
 								        int err;
 								        /* Install the flow into kernel path first */
 								        err = udpif_flow_program(udpif, flow, DPIF_OFFLOAD_NEVER);
 								        if (err) {
 								            continue;
 								        }
 								        /* Success; now remove offloaded flow from netdev */
 								        err = udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_ALWAYS);
 								        if (err) {
 								            udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_NEVER);
 								            continue;
 								        }
 								        udpif_set_ukey_backlog_packets(flow);
 								        flow->offloaded = false;
 								    }
 								}
 								/*
 								 * Rebalance offloaded flows on a netdev that's in OOR state.
 								 *
 								 * The rebalancing is done in two phases. In the first phase, we check if
 								 * the pending flows can be offloaded (if some resources became available
 								 * in the meantime) by trying to offload each pending flow. If all pending
 								 * flows get successfully offloaded, the OOR state is cleared on the netdev
 								 * and there's nothing to rebalance.
 								 *
 								 * If some of the pending flows could not be offloaded, i.e, we still see
 								 * the OOR error, then we move to the second phase of rebalancing. In this
 								 * phase, the rebalancer compares pps-rate of an offloaded flow with the
 								 * least pps-rate with that of a pending flow with the highest pps-rate from
 								 * their respective sorted arrays. If pps-rate of the offloaded flow is less
 								 * than the pps-rate of the pending flow, then it deletes the offloaded flow
 								 * from the HW/netdev and adds it to kernel datapath and then offloads pending
 								 * to HW/netdev. This process is repeated for every pair of offloaded and
 								 * pending flows in the ordered list. The process stops when we encounter an
 								 * offloaded flow that has a higher pps-rate than the corresponding pending
 								 * flow. The entire rebalancing process is repeated in the next iteration.
 								 */
 								static bool
 								rebalance_device(struct udpif *udpif, struct udpif_key **offloaded_flows,
 								                 int offload_count, struct udpif_key **pending_flows,
 								                 int pending_count)
 								{
 								    /* Phase 1 */
 								    int num_inserted = rebalance_insert_pending(udpif, pending_flows,
 								                                                pending_count, pending_count,
 );
 								    if (num_inserted) {
 								        VLOG_DBG("Offload rebalance: Phase1: inserted %d pending flows",
 								                  num_inserted);
 								    }
 								    /* Adjust pending array */
 								    pending_flows = &pending_flows[num_inserted];
 								    pending_count -= num_inserted;
 								    if (!pending_count) {
 								        /*
 								         * Successfully offloaded all pending flows. The device
 								         * is no longer in OOR state; done rebalancing this device.
 								         */
 								        return false;
 								    }
 								    /*
 								     * Phase 2; determine how many offloaded flows to churn.
 								     */
 								#define	OFFL_REBAL_MAX_CHURN    1024
 								    int churn_count = 0;
 								    while (churn_count < OFFL_REBAL_MAX_CHURN && churn_count < offload_count
 								           && churn_count < pending_count) {
 								        if (pending_flows[churn_count]->flow_pps_rate <=
 								            offloaded_flows[churn_count]->flow_pps_rate)
 								                break;
 								        churn_count++;
 								    }
 								    if (churn_count) {
 								        VLOG_DBG("Offload rebalance: Phase2: removing %d offloaded flows",
 								                  churn_count);
 								    }
 								    /* Bail early if nothing to churn */
 								    if (!churn_count) {
 								        return true;
 								    }
 								    /* Remove offloaded flows */
 								    rebalance_remove_offloaded(udpif, offloaded_flows, churn_count);
 								    /* Adjust offloaded array */
 								    offloaded_flows = &offloaded_flows[churn_count];
 								    offload_count -= churn_count;
 								    /* Replace offloaded flows with pending flows */
 								    num_inserted = rebalance_insert_pending(udpif, pending_flows,
 								                                            pending_count, churn_count,
 								                                            offload_count ?
 								                                            offloaded_flows[0]->flow_pps_rate :
 );
 								    if (num_inserted) {
 								        VLOG_DBG("Offload rebalance: Phase2: inserted %d pending flows",
 								                  num_inserted);
 								    }
 								    return true;
 								}
 								static struct udpif_key **
 								udpif_add_oor_flows(struct udpif_key **sort_flows, size_t *total_flow_count,
 								                    size_t *alloc_flow_count, struct udpif_key *ukey)
 								{
 								    if (*total_flow_count >= *alloc_flow_count) {
 								        sort_flows = x2nrealloc(sort_flows, alloc_flow_count, sizeof ukey);
 								    }
 								    sort_flows[(*total_flow_count)++] = ukey;
 								    return sort_flows;
 								}
 								/*
 								 * Build sort_flows[] initially with flows that
 								 * reference an 'OOR' netdev as their input port.
 								 */
 								static struct udpif_key **
 								udpif_build_oor_flows(struct udpif_key **sort_flows, size_t *total_flow_count,
 								                      size_t *alloc_flow_count, struct udpif_key *ukey,
 								                      int *oor_netdev_count)
 								{
 								    struct netdev *netdev;
 								    int count;
 								    /* Input netdev must be available for the flow */
 								    netdev = ukey->in_netdev;
 								    if (!netdev) {
 								        return sort_flows;
 								    }
 								    /* Is the in-netdev for this flow in OOR state ? */
 								    if (!netdev_get_hw_info(netdev, HW_INFO_TYPE_OOR)) {
 								        ukey_netdev_unref(ukey);
 								        return sort_flows;
 								    }
 								    /* Add the flow to sort_flows[] */
 								    sort_flows = udpif_add_oor_flows(sort_flows, total_flow_count,
 								                                      alloc_flow_count, ukey);
 								    if (ukey->offloaded) {
 								        count = netdev_get_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT);
 								        ovs_assert(count >= 0);
 								        if (count++ == 0) {
 								            (*oor_netdev_count)++;
 								        }
 								        netdev_set_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT, count);
 								    } else {
 								        count = netdev_get_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT);
 								        ovs_assert(count >= 0);
 								        netdev_set_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT, ++count);
 								    }
 								    return sort_flows;
 								}
 								/*
 								 * Rebalance offloaded flows on HW netdevs that are in OOR state.
 								 */
 								static void
 								udpif_flow_rebalance(struct udpif *udpif)
 								{
 								    struct udpif_key **sort_flows = NULL;
 								    size_t alloc_flow_count = 0;
 								    size_t total_flow_count = 0;
 								    int oor_netdev_count = 0;
 								    int offload_index = 0;
 								    int pending_index;
 								    /* Collect flows (offloaded and pending) that reference OOR netdevs */
 								    for (size_t i = 0; i < N_UMAPS; i++) {
 								        struct udpif_key *ukey;
 								        struct umap *umap = &udpif->ukeys[i];
 								        CMAP_FOR_EACH (ukey, cmap_node, &umap->cmap) {
 								            ukey_to_flow_netdev(udpif, ukey);
 								            sort_flows = udpif_build_oor_flows(sort_flows, &total_flow_count,
 								                                               &alloc_flow_count, ukey,
 								                                               &oor_netdev_count);
 								        }
 								    }
 								    /* Sort flows by OOR netdevs, state (offloaded/pending) and pps-rate  */
 								    qsort(sort_flows, total_flow_count, sizeof(struct udpif_key *),
 								          flow_compare_rebalance);
 								    /*
 								     * We now have flows referencing OOR netdevs, that are sorted. We also
 								     * have a count of offloaded and pending flows on each of the netdevs
 								     * that are in OOR state. Now rebalance each oor-netdev.
 								     */
 								    while (oor_netdev_count) {
 								        struct netdev *netdev;
 								        int offload_count;
 								        int pending_count;
 								        bool oor;
 								        netdev = sort_flows[offload_index]->in_netdev;
 								        ovs_assert(netdev_get_hw_info(netdev, HW_INFO_TYPE_OOR) == true);
 								        VLOG_DBG("Offload rebalance: netdev: %s is OOR", netdev->name);
 								        offload_count = netdev_get_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT);
 								        pending_count = netdev_get_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT);
 								        pending_index = offload_index + offload_count;
 								        oor = rebalance_device(udpif,
 								                               &sort_flows[offload_index], offload_count,
 								                               &sort_flows[pending_index], pending_count);
 								        netdev_set_hw_info(netdev, HW_INFO_TYPE_OOR, oor);
 								        offload_index = pending_index + pending_count;
 								        netdev_set_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT, 0);
 								        netdev_set_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT, 0);
 								        oor_netdev_count--;
 								    }
 								    for (int i = 0; i < total_flow_count; i++) {
 								        struct udpif_key *ukey = sort_flows[i];
 								        ukey_netdev_unref(ukey);
 								    }
 								    free(sort_flows);
 								}
 								static int
 								udpif_flow_program(struct udpif *udpif, struct udpif_key *ukey,
 								                   enum dpif_offload_type offload_type)
 								{
 								    struct dpif_op *opsp;
 								    struct ukey_op uop;
 								    opsp = &uop.dop;
 								    put_op_init(&uop, ukey, DPIF_FP_CREATE);
 								    dpif_operate(udpif->dpif, &opsp, 1, offload_type);
 								    return opsp->error;
 								}
 								static int
 								udpif_flow_unprogram(struct udpif *udpif, struct udpif_key *ukey,
 								                     enum dpif_offload_type offload_type)
 								{
 								    struct dpif_op *opsp;
 								    struct ukey_op uop;
 								    opsp = &uop.dop;
 								    delete_op_init(udpif, &uop, ukey);
 								    dpif_operate(udpif->dpif, &opsp, 1, offload_type);
 								    return opsp->error;
 								}