ovs/lib/dpif.h

/*
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * dpif, the DataPath InterFace.
 *
 * In Open vSwitch terminology, a "datapath" is a flow-based software switch.
 * A datapath has no intelligence of its own.  Rather, it relies entirely on
 * its client to set up flows.  The datapath layer is core to the Open vSwitch
 * software switch: one could say, without much exaggeration, that everything
 * in ovs-vswitchd above dpif exists only to make the correct decisions
 * interacting with dpif.
 *
 * Typically, the client of a datapath is the software switch module in
 * "ovs-vswitchd", but other clients can be written.  The "ovs-dpctl" utility
 * is also a (simple) client.
 *
 *
 * Overview
 * ========
 *
 * The terms written in quotes below are defined in later sections.
 *
 * When a datapath "port" receives a packet, it extracts the headers (the
 * "flow").  If the datapath's "flow table" contains a "flow entry" matching
 * the packet, then it executes the "actions" in the flow entry and increments
 * the flow's statistics.  If there is no matching flow entry, the datapath
 * instead appends the packet to an "upcall" queue.
 *
 *
 * Ports
 * =====
 *
 * A datapath has a set of ports that are analogous to the ports on an Ethernet
 * switch.  At the datapath level, each port has the following information
 * associated with it:
 *
 *    - A name, a short string that must be unique within the host.  This is
 *      typically a name that would be familiar to the system administrator,
 *      e.g. "eth0" or "vif1.1", but it is otherwise arbitrary.
 *
 *    - A 32-bit port number that must be unique within the datapath but is
 *      otherwise arbitrary.  The port number is the most important identifier
 *      for a port in the datapath interface.
 *
 *    - A type, a short string that identifies the kind of port.  On a Linux
 *      host, typical types are "system" (for a network device such as eth0),
 *      "internal" (for a simulated port used to connect to the TCP/IP stack),
 *      and "gre" (for a GRE tunnel).
 *
 *    - A Netlink PID for each upcall reading thread (see "Upcall Queuing and
 *      Ordering" below).
 *
 * The dpif interface has functions for adding and deleting ports.  When a
 * datapath implements these (e.g. as the Linux and netdev datapaths do), then
 * Open vSwitch's ovs-vswitchd daemon can directly control what ports are used
 * for switching.  Some datapaths might not implement them, or implement them
 * with restrictions on the types of ports that can be added or removed,
 * on systems where port membership can only be changed by some external
 * entity.
 *
 * Each datapath must have a port, sometimes called the "local port", whose
 * name is the same as the datapath itself, with port number 0.  The local port
 * cannot be deleted.
 *
 * Ports are available as "struct netdev"s.  To obtain a "struct netdev *" for
 * a port named 'name' with type 'port_type', in a datapath of type
 * 'datapath_type', call netdev_open(name, dpif_port_open_type(datapath_type,
 * port_type).  The netdev can be used to get and set important data related to
 * the port, such as:
 *
 *    - MTU (netdev_get_mtu(), netdev_set_mtu()).
 *
 *    - Ethernet address (netdev_get_etheraddr(), netdev_set_etheraddr()).
 *
 *    - Statistics such as the number of packets and bytes transmitted and
 *      received (netdev_get_stats()).
 *
 *    - Carrier status (netdev_get_carrier()).
 *
 *    - Link features (netdev_get_features()).
 *
 *    - Speed (netdev_get_speed()).
 *
 *    - QoS queue configuration (netdev_get_queue(), netdev_set_queue() and
 *      related functions.)
 *
 *    - Arbitrary port-specific configuration parameters (netdev_get_config(),
 *      netdev_set_config()).  An example of such a parameter is the IP
 *      endpoint for a GRE tunnel.
 *
 *
 * Flow Table
 * ==========
 *
 * The flow table is a collection of "flow entries".  Each flow entry contains:
 *
 *    - A "flow", that is, a summary of the headers in an Ethernet packet.  The
 *      flow must be unique within the flow table.  Flows are fine-grained
 *      entities that include L2, L3, and L4 headers.  A single TCP connection
 *      consists of two flows, one in each direction.
 *
 *      In Open vSwitch userspace, "struct flow" is the typical way to describe
 *      a flow, but the datapath interface uses a different data format to
 *      allow ABI forward- and backward-compatibility.  Refer to OVS_KEY_ATTR_*
 *      and "struct ovs_key_*" in include/odp-netlink.h for details.
 *      lib/odp-util.h defines several functions for working with these flows.
 *
 *    - A "mask" that, for each bit in the flow, specifies whether the datapath
 *      should consider the corresponding flow bit when deciding whether a
 *      given packet matches the flow entry.  The original datapath design did
 *      not support matching: every flow entry was exact match.  With the
 *      addition of a mask, the interface supports datapaths with a spectrum of
 *      wildcard matching capabilities, from those that only support exact
 *      matches to those that support bitwise wildcarding on the entire flow
 *      key, as well as datapaths with capabilities somewhere in between.
 *
 *      Datapaths do not provide a way to query their wildcarding capabilities,
 *      nor is it expected that the client should attempt to probe for the
 *      details of their support.  Instead, a client installs flows with masks
 *      that wildcard as many bits as acceptable.  The datapath then actually
 *      wildcards as many of those bits as it can and changes the wildcard bits
 *      that it does not support into exact match bits.  A datapath that can
 *      wildcard any bit, for example, would install the supplied mask, an
 *      exact-match only datapath would install an exact-match mask regardless
 *      of what mask the client supplied, and a datapath in the middle of the
 *      spectrum would selectively change some wildcard bits into exact match
 *      bits.
 *
 *      Regardless of the requested or installed mask, the datapath retains the
 *      original flow supplied by the client.  (It does not, for example, "zero
 *      out" the wildcarded bits.)  This allows the client to unambiguously
 *      identify the flow entry in later flow table operations.
 *
 *      The flow table does not have priorities; that is, all flow entries have
 *      equal priority.  Detecting overlapping flow entries is expensive in
 *      general, so the datapath is not required to do it.  It is primarily the
 *      client's responsibility not to install flow entries whose flow and mask
 *      combinations overlap.
 *
 *    - A list of "actions" that tell the datapath what to do with packets
 *      within a flow.  Some examples of actions are OVS_ACTION_ATTR_OUTPUT,
 *      which transmits the packet out a port, and OVS_ACTION_ATTR_SET, which
 *      modifies packet headers.  Refer to OVS_ACTION_ATTR_* and "struct
 *      ovs_action_*" in include/odp-netlink.h for details.  lib/odp-util.h
 *      defines several functions for working with datapath actions.
 *
 *      The actions list may be empty.  This indicates that nothing should be
 *      done to matching packets, that is, they should be dropped.
 *
 *      (In case you are familiar with OpenFlow, datapath actions are analogous
 *      to OpenFlow actions.)
 *
 *    - Statistics: the number of packets and bytes that the flow has
 *      processed, the last time that the flow processed a packet, and the
 *      union of all the TCP flags in packets processed by the flow.  (The
 *      latter is 0 if the flow is not a TCP flow.)
 *
 * The datapath's client manages the flow table, primarily in reaction to
 * "upcalls" (see below).
 *
 *
 * Upcalls
 * =======
 *
 * A datapath sometimes needs to notify its client that a packet was received.
 * The datapath mechanism to do this is called an "upcall".
 *
 * Upcalls are used in two situations:
 *
 *    - When a packet is received, but there is no matching flow entry in its
 *      flow table (a flow table "miss"), this causes an upcall of type
 *      DPIF_UC_MISS.  These are called "miss" upcalls.
 *
 *    - A datapath action of type OVS_ACTION_ATTR_USERSPACE causes an upcall of
 *      type DPIF_UC_ACTION.  These are called "action" upcalls.
 *
 * An upcall contains an entire packet.  There is no attempt to, e.g., copy
 * only as much of the packet as normally needed to make a forwarding decision.
 * Such an optimization is doable, but experimental prototypes showed it to be
 * of little benefit because an upcall typically contains the first packet of a
 * flow, which is usually short (e.g. a TCP SYN).  Also, the entire packet can
 * sometimes really be needed.
 *
 * After a client reads a given upcall, the datapath is finished with it, that
 * is, the datapath doesn't maintain any lingering state past that point.
 *
 * The latency from the time that a packet arrives at a port to the time that
 * it is received from dpif_recv() is critical in some benchmarks.  For
 * example, if this latency is 1 ms, then a netperf TCP_CRR test, which opens
 * and closes TCP connections one at a time as quickly as it can, cannot
 * possibly achieve more than 500 transactions per second, since every
 * connection consists of two flows with 1-ms latency to set up each one.
 *
 * To receive upcalls, a client has to enable them with dpif_recv_set().  A
 * datapath should generally support being opened multiple times (e.g. so that
 * one may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd"
 * is also running) but need not support more than one of these clients
 * enabling upcalls at once.
 *
 *
 * Upcall Queuing and Ordering
 * ---------------------------
 *
 * The datapath's client reads upcalls one at a time by calling dpif_recv().
 * When more than one upcall is pending, the order in which the datapath
 * presents upcalls to its client is important.  The datapath's client does not
 * directly control this order, so the datapath implementer must take care
 * during design.
 *
 * The minimal behavior, suitable for initial testing of a datapath
 * implementation, is that all upcalls are appended to a single queue, which is
 * delivered to the client in order.
 *
 * The datapath should ensure that a high rate of upcalls from one particular
 * port cannot cause upcalls from other sources to be dropped or unreasonably
 * delayed.  Otherwise, one port conducting a port scan or otherwise initiating
 * high-rate traffic spanning many flows could suppress other traffic.
 * Ideally, the datapath should present upcalls from each port in a "round
 * robin" manner, to ensure fairness.
 *
 * The client has no control over "miss" upcalls and no insight into the
 * datapath's implementation, so the datapath is entirely responsible for
 * queuing and delivering them.  On the other hand, the datapath has
 * considerable freedom of implementation.  One good approach is to maintain a
 * separate queue for each port, to prevent any given port's upcalls from
 * interfering with other ports' upcalls.  If this is impractical, then another
 * reasonable choice is to maintain some fixed number of queues and assign each
 * port to one of them.  Ports assigned to the same queue can then interfere
 * with each other, but not with ports assigned to different queues.  Other
 * approaches are also possible.
 *
 * The client has some control over "action" upcalls: it can specify a 32-bit
 * "Netlink PID" as part of the action.  This terminology comes from the Linux
 * datapath implementation, which uses a protocol called Netlink in which a PID
 * designates a particular socket and the upcall data is delivered to the
 * socket's receive queue.  Generically, though, a Netlink PID identifies a
 * queue for upcalls.  The basic requirements on the datapath are:
 *
 *    - The datapath must provide a Netlink PID associated with each port.  The
 *      client can retrieve the PID with dpif_port_get_pid().
 *
 *    - The datapath must provide a "special" Netlink PID not associated with
 *      any port.  dpif_port_get_pid() also provides this PID.  (ovs-vswitchd
 *      uses this PID to queue special packets that must not be lost even if a
 *      port is otherwise busy, such as packets used for tunnel monitoring.)
 *
 * The minimal behavior of dpif_port_get_pid() and the treatment of the Netlink
 * PID in "action" upcalls is that dpif_port_get_pid() returns a constant value
 * and all upcalls are appended to a single queue.
 *
 * The preferred behavior is:
 *
 *    - Each port has a PID that identifies the queue used for "miss" upcalls
 *      on that port.  (Thus, if each port has its own queue for "miss"
 *      upcalls, then each port has a different Netlink PID.)
 *
 *    - "miss" upcalls for a given port and "action" upcalls that specify that
 *      port's Netlink PID add their upcalls to the same queue.  The upcalls
 *      are delivered to the datapath's client in the order that the packets
 *      were received, regardless of whether the upcalls are "miss" or "action"
 *      upcalls.
 *
 *    - Upcalls that specify the "special" Netlink PID are queued separately.
 *
 *
 * Packet Format
 * =============
 *
 * The datapath interface works with packets in a particular form.  This is the
 * form taken by packets received via upcalls (i.e. by dpif_recv()).  Packets
 * supplied to the datapath for processing (i.e. to dpif_execute()) also take
 * this form.
 *
 * A VLAN tag is represented by an 802.1Q header.  If the layer below the
 * datapath interface uses another representation, then the datapath interface
 * must perform conversion.
 *
 * The datapath interface requires all packets to fit within the MTU.  Some
 * operating systems internally process packets larger than MTU, with features
 * such as TSO and UFO.  When such a packet passes through the datapath
 * interface, it must be broken into multiple MTU or smaller sized packets for
 * presentation as upcalls.  (This does not happen often, because an upcall
 * typically contains the first packet of a flow, which is usually short.)
 *
 * Some operating system TCP/IP stacks maintain packets in an unchecksummed or
 * partially checksummed state until transmission.  The datapath interface
 * requires all host-generated packets to be fully checksummed (e.g. IP and TCP
 * checksums must be correct).  On such an OS, the datapath interface must fill
 * in these checksums.
 *
 * Packets passed through the datapath interface must be at least 14 bytes
 * long, that is, they must have a complete Ethernet header.  They are not
 * required to be padded to the minimum Ethernet length.
 *
 *
 * Typical Usage
 * =============
 *
 * Typically, the client of a datapath begins by configuring the datapath with
 * a set of ports.  Afterward, the client runs in a loop polling for upcalls to
 * arrive.
 *
 * For each upcall received, the client examines the enclosed packet and
 * figures out what should be done with it.  For example, if the client
 * implements a MAC-learning switch, then it searches the forwarding database
 * for the packet's destination MAC and VLAN and determines the set of ports to
 * which it should be sent.  In any case, the client composes a set of datapath
 * actions to properly dispatch the packet and then directs the datapath to
 * execute those actions on the packet (e.g. with dpif_execute()).
 *
 * Most of the time, the actions that the client executed on the packet apply
 * to every packet with the same flow.  For example, the flow includes both
 * destination MAC and VLAN ID (and much more), so this is true for the
 * MAC-learning switch example above.  In such a case, the client can also
 * direct the datapath to treat any further packets in the flow in the same
 * way, using dpif_flow_put() to add a new flow entry.
 *
 * Other tasks the client might need to perform, in addition to reacting to
 * upcalls, include:
 *
 *    - Periodically polling flow statistics, perhaps to supply to its own
 *      clients.
 *
 *    - Deleting flow entries from the datapath that haven't been used
 *      recently, to save memory.
 *
 *    - Updating flow entries whose actions should change.  For example, if a
 *      MAC learning switch learns that a MAC has moved, then it must update
 *      the actions of flow entries that sent packets to the MAC at its old
 *      location.
 *
 *    - Adding and removing ports to achieve a new configuration.
 *
 *
 * Thread-safety
 * =============
 *
 * Most of the dpif functions are fully thread-safe: they may be called from
 * any number of threads on the same or different dpif objects.  The exceptions
 * are:
 *
 *    - dpif_port_poll() and dpif_port_poll_wait() are conditionally
 *      thread-safe: they may be called from different threads only on
 *      different dpif objects.
 *
 *    - dpif_flow_dump_next() is conditionally thread-safe: It may be called
 *      from different threads with the same 'struct dpif_flow_dump', but all
 *      other parameters must be different for each thread.
 *
 *    - dpif_flow_dump_done() is conditionally thread-safe: All threads that
 *      share the same 'struct dpif_flow_dump' must have finished using it.
 *      This function must then be called exactly once for a particular
 *      dpif_flow_dump to finish the corresponding flow dump operation.
 *
 *    - Functions that operate on 'struct dpif_port_dump' are conditionally
 *      thread-safe with respect to those objects.  That is, one may dump ports
 *      from any number of threads at once, but each thread must use its own
 *      struct dpif_port_dump.
 */
#ifndef DPIF_H
#define DPIF_H 1

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#include "dpdk.h"
#include "dp-packet.h"
#include "netdev.h"
#include "openflow/openflow.h"
#include "openvswitch/ofp-meter.h"
#include "ovs-numa.h"
#include "packets.h"
#include "util.h"

#ifdef  __cplusplus
extern "C" {
#endif

struct dpif;
struct dpif_class;
struct dpif_flow;
struct ds;
struct flow;
struct flow_wildcards;
struct nlattr;
struct sset;

int dp_register_provider(const struct dpif_class *);
int dp_unregister_provider(const char *type);
void dp_disallow_provider(const char *type);
void dp_enumerate_types(struct sset *types);
const char *dpif_normalize_type(const char *);

int dp_enumerate_names(const char *type, struct sset *names);
void dp_parse_name(const char *datapath_name, char **name, char **type);

int dpif_open(const char *name, const char *type, struct dpif **);
int dpif_create(const char *name, const char *type, struct dpif **);
int dpif_create_and_open(const char *name, const char *type, struct dpif **);
void dpif_close(struct dpif *);

bool dpif_run(struct dpif *);
void dpif_wait(struct dpif *);

const char *dpif_name(const struct dpif *);
const char *dpif_base_name(const struct dpif *);
const char *dpif_type(const struct dpif *);

bool dpif_cleanup_required(const struct dpif *);

int dpif_delete(struct dpif *);

/* Statistics for a dpif as a whole. */
struct dpif_dp_stats {
    uint64_t n_hit;             /* Number of flow table matches. */
    uint64_t n_missed;          /* Number of flow table misses. */
    uint64_t n_lost;            /* Number of misses not sent to userspace. */
    uint64_t n_flows;           /* Number of flows present. */
    uint64_t n_cache_hit;       /* Number of mega flow mask cache hits for
                                   flow table matches. */
    uint64_t n_mask_hit;        /* Number of mega flow masks visited for
                                   flow table matches. */
    uint32_t n_masks;           /* Number of mega flow masks. */
};
int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);

int dpif_set_features(struct dpif *, uint32_t new_features);

int dpif_get_n_offloaded_flows(struct dpif *dpif, uint64_t *n_flows);


/* Port operations. */

const char *dpif_port_open_type(const char *datapath_type,
                                const char *port_type);
int dpif_port_add(struct dpif *, struct netdev *, odp_port_t *port_nop);
int dpif_port_del(struct dpif *, odp_port_t port_no, bool local_delete);

/* A port within a datapath.
 *
 * 'name' and 'type' are suitable for passing to netdev_open(). */
struct dpif_port {
    char *name;                 /* Network device name, e.g. "eth0". */
    char *type;                 /* Network device type, e.g. "system". */
    odp_port_t port_no;         /* Port number within datapath. */
};
void dpif_port_clone(struct dpif_port *, const struct dpif_port *);
void dpif_port_destroy(struct dpif_port *);
bool dpif_port_exists(const struct dpif *dpif, const char *devname);
int dpif_port_query_by_number(const struct dpif *, odp_port_t port_no,
                              struct dpif_port *, bool warn_if_not_found);
int dpif_port_query_by_name(const struct dpif *, const char *devname,
                            struct dpif_port *);
int dpif_port_get_name(struct dpif *, odp_port_t port_no,
                       char *name, size_t name_size);
uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no);

struct dpif_port_dump {
    const struct dpif *dpif;
    int error;
    void *state;
};
void dpif_port_dump_start(struct dpif_port_dump *, const struct dpif *);
bool dpif_port_dump_next(struct dpif_port_dump *, struct dpif_port *);
int dpif_port_dump_done(struct dpif_port_dump *);

/* Iterates through each DPIF_PORT in DPIF, using DUMP as state.
 *
 * Arguments all have pointer type.
 *
 * If you break out of the loop, then you need to free the dump structure by
 * hand using dpif_port_dump_done(). */
#define DPIF_PORT_FOR_EACH(DPIF_PORT, DUMP, DPIF)   \
    for (dpif_port_dump_start(DUMP, DPIF);          \
         (dpif_port_dump_next(DUMP, DPIF_PORT)      \
          ? true                                    \
          : (dpif_port_dump_done(DUMP), false));    \
        )

int dpif_port_poll(const struct dpif *, char **devnamep);
void dpif_port_poll_wait(const struct dpif *);

/* Flow table operations. */

struct dpif_flow_stats {
    uint64_t n_packets;
    uint64_t n_bytes;
    long long int used;
    uint16_t tcp_flags;
};

/* more statistics info for offloaded packets and bytes */
struct dpif_flow_detailed_stats {
    uint64_t n_packets;
    uint64_t n_bytes;
    /* n_offload_packets are a subset of n_packets */
    uint64_t n_offload_packets;
    /* n_offload_bytes are a subset of n_bytes */
    uint64_t n_offload_bytes;
    long long int used;
    uint16_t tcp_flags;
};

struct dpif_flow_attrs {
    bool offloaded;            /* True if flow is offloaded to HW. */
    const char *dp_layer;      /* DP layer the flow is handled in. */
    const char *dp_extra_info; /* Extra information provided by DP. */
};

struct dpif_flow_dump_types {
    bool ovs_flows;
    bool netdev_flows;
};

void dpif_flow_stats_extract(const struct flow *, const struct dp_packet *packet,
                             long long int used, struct dpif_flow_stats *);
void dpif_flow_stats_format(const struct dpif_flow_stats *, struct ds *);

enum dpif_flow_put_flags {
    DPIF_FP_CREATE = 1 << 0,    /* Allow creating a new flow. */
    DPIF_FP_MODIFY = 1 << 1,    /* Allow modifying an existing flow. */
    DPIF_FP_ZERO_STATS = 1 << 2, /* Zero the stats of an existing flow. */
    DPIF_FP_PROBE = 1 << 3      /* Suppress error messages, if any. */
};

bool dpif_probe_feature(struct dpif *, const char *name,
                        const struct ofpbuf *key, const struct ofpbuf *actions,
                        const ovs_u128 *ufid);
int dpif_flow_flush(struct dpif *);
int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags,
                  const struct nlattr *key, size_t key_len,
                  const struct nlattr *mask, size_t mask_len,
                  const struct nlattr *actions, size_t actions_len,
                  const ovs_u128 *ufid, const unsigned pmd_id,
                  struct dpif_flow_stats *);
int dpif_flow_del(struct dpif *,
                  const struct nlattr *key, size_t key_len,
                  const ovs_u128 *ufid, const unsigned pmd_id,
                  struct dpif_flow_stats *);
int dpif_flow_get(struct dpif *,
                  const struct nlattr *key, size_t key_len,
                  const ovs_u128 *ufid, const unsigned pmd_id,
                  struct ofpbuf *, struct dpif_flow *);

/* Flow dumping interface
 * ======================
 *
 * This interface allows iteration through all of the flows currently installed
 * in a datapath.  It is somewhat complicated by two requirements:
 *
 *    - Efficient support for dumping flows in parallel from multiple threads.
 *
 *    - Allow callers to avoid making unnecessary copies of data returned by
 *      the interface across several flows in cases where the dpif
 *      implementation has to maintain a copy of that information anyhow.
 *      (That is, allow the client visibility into any underlying batching as
 *      part of its own batching.)
 *
 *
 * Usage
 * -----
 *
 * 1. Call dpif_flow_dump_create().
 * 2. In each thread that participates in the dump (which may be just a single
 *    thread if parallelism isn't important):
 *        (a) Call dpif_flow_dump_thread_create().
 *        (b) Call dpif_flow_dump_next() repeatedly until it returns 0.
 *        (c) Call dpif_flow_dump_thread_destroy().
 * 3. Call dpif_flow_dump_destroy().
 *
 * All error reporting is deferred to the call to dpif_flow_dump_destroy().
 */
struct dpif_flow_dump *dpif_flow_dump_create(const struct dpif *, bool terse,
                                             struct dpif_flow_dump_types *);
int dpif_flow_dump_destroy(struct dpif_flow_dump *);

struct dpif_flow_dump_thread *dpif_flow_dump_thread_create(
    struct dpif_flow_dump *);
void dpif_flow_dump_thread_destroy(struct dpif_flow_dump_thread *);

#define PMD_ID_NULL OVS_CORE_UNSPEC

/* A datapath flow as dumped by dpif_flow_dump_next(). */
struct dpif_flow {
    const struct nlattr *key;     /* Flow key, as OVS_KEY_ATTR_* attrs. */
    size_t key_len;               /* 'key' length in bytes. */
    const struct nlattr *mask;    /* Flow mask, as OVS_KEY_ATTR_* attrs. */
    size_t mask_len;              /* 'mask' length in bytes. */
    const struct nlattr *actions; /* Actions, as OVS_ACTION_ATTR_ */
    size_t actions_len;           /* 'actions' length in bytes. */
    ovs_u128 ufid;                /* Unique flow identifier. */
    bool ufid_present;            /* True if 'ufid' was provided by datapath.*/
    unsigned pmd_id;              /* Datapath poll mode driver id. */
    struct dpif_flow_stats stats; /* Flow statistics. */
    struct dpif_flow_attrs attrs; /* Flow attributes. */
};
int dpif_flow_dump_next(struct dpif_flow_dump_thread *,
                        struct dpif_flow *flows, int max_flows);

#define DPIF_FLOW_BUFSIZE 2048

/* Operation batching interface.
 *
 * Some datapaths are faster at performing N operations together than the same
 * N operations individually, hence an interface for batching.
 */

enum dpif_op_type {
    DPIF_OP_FLOW_PUT = 1,
    DPIF_OP_FLOW_DEL,
    DPIF_OP_EXECUTE,
    DPIF_OP_FLOW_GET,
};

/* offload_type argument types to (*operate) interface */
enum dpif_offload_type {
    DPIF_OFFLOAD_AUTO,         /* Offload if possible, fallback to software. */
    DPIF_OFFLOAD_NEVER,        /* Never offload to hardware. */
    DPIF_OFFLOAD_ALWAYS,       /* Always offload to hardware. */
};

/* Add or modify a flow.
 *
 * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in
 * the 'key_len' bytes starting at 'key'.  The associated actions are specified
 * by the Netlink attributes with types OVS_ACTION_ATTR_* in the 'actions_len'
 * bytes starting at 'actions'.
 *
 *   - If the flow's key does not exist in the dpif, then the flow will be
 *     added if 'flags' includes DPIF_FP_CREATE.  Otherwise the operation will
 *     fail with ENOENT.
 *
 *     If the operation succeeds, then 'stats', if nonnull, will be zeroed.
 *
 *   - If the flow's key does exist in the dpif, then the flow's actions will
 *     be updated if 'flags' includes DPIF_FP_MODIFY.  Otherwise the operation
 *     will fail with EEXIST.  If the flow's actions are updated, then its
 *     statistics will be zeroed if 'flags' includes DPIF_FP_ZERO_STATS, and
 *     left as-is otherwise.
 *
 *     If the operation succeeds, then 'stats', if nonnull, will be set to the
 *     flow's statistics before the update.
 *
 *   - If the datapath implements multiple pmd thread with its own flow
 *     table, 'pmd_id' should be used to specify the particular polling
 *     thread for the operation. PMD_ID_NULL means that the flow should
 *     be put on all the polling threads.
 */
struct dpif_flow_put {
    /* Input. */
    enum dpif_flow_put_flags flags; /* DPIF_FP_*. */
    const struct nlattr *key;       /* Flow to put. */
    size_t key_len;                 /* Length of 'key' in bytes. */
    const struct nlattr *mask;      /* Mask to put. */
    size_t mask_len;                /* Length of 'mask' in bytes. */
    const struct nlattr *actions;   /* Actions to perform on flow. */
    size_t actions_len;             /* Length of 'actions' in bytes. */
    const ovs_u128 *ufid;           /* Optional unique flow identifier. */
    unsigned pmd_id;                /* Datapath poll mode driver id. */

    /* Output. */
    struct dpif_flow_stats *stats;  /* Optional flow statistics. */
};

/* Delete a flow.
 *
 * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in
 * the 'key_len' bytes starting at 'key', or the unique identifier 'ufid'. If
 * the flow was created using 'ufid', then 'ufid' must be specified to delete
 * the flow. If both are specified, 'key' will be ignored for flow deletion.
 * Succeeds with status 0 if the flow is deleted, or fails with ENOENT if the
 * dpif does not contain such a flow.
 *
 * Callers should always provide the 'key' to improve dpif logging in the event
 * of errors or unexpected behaviour.
 *
 * If the datapath implements multiple polling thread with its own flow table,
 * 'pmd_id' should be used to specify the particular polling thread for the
 * operation. PMD_ID_NULL means that the flow should be deleted from all the
 * polling threads.
 *
 * If the operation succeeds, then 'stats', if nonnull, will be set to the
 * flow's statistics before its deletion. */
struct dpif_flow_del {
    /* Input. */
    const struct nlattr *key;       /* Flow to delete. */
    size_t key_len;                 /* Length of 'key' in bytes. */
    const ovs_u128 *ufid;           /* Unique identifier of flow to delete. */
    bool terse;                     /* OK to skip sending/receiving full flow
                                     * info? */
    unsigned pmd_id;                /* Datapath poll mode driver id. */

    /* Output. */
    struct dpif_flow_stats *stats;  /* Optional flow statistics. */
};

/* Executes actions on a specified packet.
 *
 * Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet
 * frame in 'packet' and on the packet metadata in 'md'.  May modify both
 * 'packet' and 'md'.
 *
 * Some dpif providers do not implement every action.  The Linux kernel
 * datapath, in particular, does not implement ARP field modification.  If
 * 'needs_help' is true, the dpif layer executes in userspace all of the
 * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and
 * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif
 * implementation.
 *
 * This works even if 'actions_len' is too long for a Netlink attribute. */
struct dpif_execute {
    /* Input. */
    const struct nlattr *actions;   /* Actions to execute on packet. */
    size_t actions_len;             /* Length of 'actions' in bytes. */
    bool needs_help;
    bool probe;                     /* Suppress error messages. */
    unsigned int mtu;               /* Maximum transmission unit to fragment.
                                       0 if not a fragmented packet */
    uint64_t hash;                  /* Packet flow hash. 0 if not specified. */
    const struct flow *flow;         /* Flow extracted from 'packet'. */

    /* Input, but possibly modified as a side effect of execution. */
    struct dp_packet *packet;          /* Packet to execute. */
};

/* Queries the dpif for a flow entry.
 *
 * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in
 * the 'key_len' bytes starting at 'key', or the unique identifier 'ufid'. If
 * the flow was created using 'ufid', then 'ufid' must be specified to fetch
 * the flow. If both are specified, 'key' will be ignored for the flow query.
 * 'buffer' must point to an initialized buffer, with a recommended size of
 * DPIF_FLOW_BUFSIZE bytes.
 *
 * On success, 'flow' will be populated with the mask, actions, stats and attrs
 * for the datapath flow corresponding to 'key'. The mask and actions may point
 * within '*buffer', or may point at RCU-protected data. Therefore, callers
 * that wish to hold these over quiescent periods must make a copy of these
 * fields before quiescing.
 *
 * Callers should always provide 'key' to improve dpif logging in the event of
 * errors or unexpected behaviour.
 *
 * If the datapath implements multiple polling thread with its own flow table,
 * 'pmd_id' should be used to specify the particular polling thread for the
 * operation. PMD_ID_NULL means that the datapath will return the first
 * matching flow from any poll thread.
 *
 * Succeeds with status 0 if the flow is fetched, or fails with ENOENT if no
 * such flow exists. Other failures are indicated with a positive errno value.
 */
struct dpif_flow_get {
    /* Input. */
    const struct nlattr *key;       /* Flow to get. */
    size_t key_len;                 /* Length of 'key' in bytes. */
    const ovs_u128 *ufid;           /* Unique identifier of flow to get. */
    unsigned pmd_id;                /* Datapath poll mode driver id. */
    struct ofpbuf *buffer;          /* Storage for output parameters. */

    /* Output. */
    struct dpif_flow *flow;         /* Resulting flow from datapath. */
};

int dpif_execute(struct dpif *, struct dpif_execute *);

struct dpif_op {
    enum dpif_op_type type;
    int error;
    union {
        struct dpif_flow_put flow_put;
        struct dpif_flow_del flow_del;
        struct dpif_execute execute;
        struct dpif_flow_get flow_get;
    };
};

void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops,
                  enum dpif_offload_type);

/* Queries the datapath for hardware offloads stats.
 *
 * Statistics are written in 'stats' following the 'netdev_custom_stats'
 * format. They are allocated on the heap and must be freed by the caller,
 * using 'netdev_free_custom_stats_counters'.
 */
int dpif_offload_stats_get(struct dpif *dpif,
                           struct netdev_custom_stats *stats);

/* Upcalls. */

enum dpif_upcall_type {
    DPIF_UC_MISS,               /* Miss in flow table. */
    DPIF_UC_ACTION,             /* OVS_ACTION_ATTR_USERSPACE action. */
    DPIF_N_UC_TYPES
};

const char *dpif_upcall_type_to_string(enum dpif_upcall_type);

/* A packet passed up from the datapath to userspace.
 *
 * The 'packet', 'key' and 'userdata' may point into data in a buffer
 * provided by the caller, so the buffer should be released only after the
 * upcall processing has been finished.
 *
 * While being processed, the 'packet' may be reallocated, so the packet must
 * be separately released with ofpbuf_uninit().
 */
struct dpif_upcall {
    /* All types. */
    struct dp_packet packet;    /* Packet data,'dp_packet' should be the first
                                   member to avoid a hole. This is because
                                   'rte_mbuf' in dp_packet is aligned atleast
                                   on a 64-byte boundary */
    enum dpif_upcall_type type;
    struct nlattr *key;         /* Flow key. */
    size_t key_len;             /* Length of 'key' in bytes. */
    ovs_u128 ufid;              /* Unique flow identifier for 'key'. */
    struct nlattr *mru;         /* Maximum receive unit. */
    struct nlattr *hash;        /* Packet hash. */
    struct nlattr *cutlen;      /* Number of bytes shrink from the end. */

    /* DPIF_UC_ACTION only. */
    struct nlattr *userdata;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
    struct nlattr *out_tun_key;    /* Output tunnel key. */
    struct nlattr *actions;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
};

/* A callback to notify higher layer of dpif about to be purged, so that
 * higher layer could try reacting to this (e.g. grabbing all flow stats
 * before they are gone).  This function is currently implemented only by
 * dpif-netdev.
 *
 * The caller needs to provide the 'aux' pointer passed down by higher
 * layer from the dpif_register_notify_cb() function and the 'pmd_id' of
 * the polling thread.
 */
    typedef void dp_purge_callback(void *aux, unsigned pmd_id);

void dpif_register_dp_purge_cb(struct dpif *, dp_purge_callback *, void *aux);

/* A callback to process an upcall, currently implemented only by dpif-netdev.
 *
 * The caller provides the 'packet' and 'flow' to process, the corresponding
 * 'ufid' as generated by odp_flow_key_hash(), the polling thread id 'pmd_id',
 * the 'type' of the upcall, and if 'type' is DPIF_UC_ACTION then the
 * 'userdata' attached to the action.
 *
 * The callback must fill in 'actions' with the datapath actions to apply to
 * 'packet'.  'wc' and 'put_actions' will either be both null or both nonnull.
 * If they are nonnull, then the caller will install a flow entry to process
 * all future packets that match 'flow' and 'wc'; the callback must store a
 * wildcard mask suitable for that purpose into 'wc'.  If the actions to store
 * into the flow entry are the same as 'actions', then the callback may leave
 * 'put_actions' empty; otherwise it must store the desired actions into
 * 'put_actions'.
 *
 * Returns 0 if successful, ENOSPC if the flow limit has been reached and no
 * flow should be installed, or some otherwise a positive errno value. */
typedef int upcall_callback(const struct dp_packet *packet,
                            const struct flow *flow,
                            ovs_u128 *ufid,
                            unsigned pmd_id,
                            enum dpif_upcall_type type,
                            const struct nlattr *userdata,
                            struct ofpbuf *actions,
                            struct flow_wildcards *wc,
                            struct ofpbuf *put_actions,
                            void *aux);

void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux);

int dpif_recv_set(struct dpif *, bool enable);
int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
bool dpif_number_handlers_required(struct dpif *, uint32_t *n_handlers);
int dpif_set_config(struct dpif *, const struct smap *cfg);
int dpif_port_set_config(struct dpif *, odp_port_t, const struct smap *cfg);
int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
              struct ofpbuf *);
void dpif_recv_purge(struct dpif *);
void dpif_recv_wait(struct dpif *, uint32_t handler_id);
void dpif_enable_upcall(struct dpif *);
void dpif_disable_upcall(struct dpif *);

void dpif_print_packet(struct dpif *, struct dpif_upcall *);

/* Meters. */
void dpif_meter_get_features(const struct dpif *,
                             struct ofputil_meter_features *);
int dpif_meter_set(struct dpif *, ofproto_meter_id meter_id,
                   struct ofputil_meter_config *);
int dpif_meter_get(const struct dpif *, ofproto_meter_id meter_id,
                   struct ofputil_meter_stats *, uint16_t n_bands);
int dpif_meter_del(struct dpif *, ofproto_meter_id meter_id,
                   struct ofputil_meter_stats *, uint16_t n_bands);

/* Bonding. */

/* Bit-mask for hashing a flow down to a bucket. */
#define BOND_MASK 0xff
#define BOND_BUCKETS (BOND_MASK + 1)

int dpif_bond_add(struct dpif *, uint32_t bond_id, odp_port_t *member_map);
int dpif_bond_del(struct dpif *, uint32_t bond_id);
int dpif_bond_stats_get(struct dpif *, uint32_t bond_id, uint64_t *n_bytes);
bool dpif_supports_lb_output_action(const struct dpif *);


/* Cache */
int dpif_cache_get_supported_levels(struct dpif *dpif, uint32_t *levels);
int dpif_cache_get_name(struct dpif *dpif, uint32_t level, const char **name);
int dpif_cache_get_size(struct dpif *dpif, uint32_t level, uint32_t *size);
int dpif_cache_set_size(struct dpif *dpif, uint32_t level, uint32_t size);


/* Miscellaneous. */

void dpif_get_netflow_ids(const struct dpif *,
                          uint8_t *engine_type, uint8_t *engine_id);

int dpif_queue_to_priority(const struct dpif *, uint32_t queue_id,
                           uint32_t *priority);

int dpif_get_pmds_for_port(const struct dpif * dpif, odp_port_t port_no,
                           unsigned int **pmds, size_t *n);

char *dpif_get_dp_version(const struct dpif *);
bool dpif_supports_tnl_push_pop(const struct dpif *);
bool dpif_may_support_explicit_drop_action(const struct dpif *);
bool dpif_may_support_psample(const struct dpif *);
bool dpif_synced_dp_layers(struct dpif *);

/* Log functions. */
struct vlog_module;

void log_flow_message(const struct dpif *dpif, int error,
                      const struct vlog_module *module,
                      const char *operation,
                      const struct nlattr *key, size_t key_len,
                      const struct nlattr *mask, size_t mask_len,
                      const ovs_u128 *ufid,
                      const struct dpif_flow_stats *stats,
                      const struct nlattr *actions, size_t actions_len);
void log_flow_put_message(const struct dpif *,
                          const struct vlog_module *,
                          const struct dpif_flow_put *,
                          int error);
void log_flow_del_message(const struct dpif *,
                          const struct vlog_module *,
                          const struct dpif_flow_del *,
                          int error);
void log_execute_message(const struct dpif *,
                         const struct vlog_module *,
                         const struct dpif_execute *,
                         bool subexecute, int error);
void log_flow_get_message(const struct dpif *,
                          const struct vlog_module *,
                          const struct dpif_flow_get *,
                          int error);
#ifdef  __cplusplus
}
#endif

#endif /* dpif.h */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								/*
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
-												Update primary code license to Apache 2.0.

											
										
										
											2009-06-15 15:11:30 -07:00
+								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
-												Update primary code license to Apache 2.0.

											
										
										
											2009-06-15 15:11:30 -07:00
+								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 */
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								/*
 								 * dpif, the DataPath InterFace.
 								 *
 								 * In Open vSwitch terminology, a "datapath" is a flow-based software switch.
 								 * A datapath has no intelligence of its own.  Rather, it relies entirely on
 								 * its client to set up flows.  The datapath layer is core to the Open vSwitch
 								 * software switch: one could say, without much exaggeration, that everything
 								 * in ovs-vswitchd above dpif exists only to make the correct decisions
 								 * interacting with dpif.
 								 *
 								 * Typically, the client of a datapath is the software switch module in
 								 * "ovs-vswitchd", but other clients can be written.  The "ovs-dpctl" utility
 								 * is also a (simple) client.
 								 *
 								 *
 								 * Overview
 								 * ========
 								 *
 								 * The terms written in quotes below are defined in later sections.
 								 *
 								 * When a datapath "port" receives a packet, it extracts the headers (the
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								 * "flow").  If the datapath's "flow table" contains a "flow entry" matching
 								 * the packet, then it executes the "actions" in the flow entry and increments
 								 * the flow's statistics.  If there is no matching flow entry, the datapath
 								 * instead appends the packet to an "upcall" queue.
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 *
 								 * Ports
 								 * =====
 								 *
 								 * A datapath has a set of ports that are analogous to the ports on an Ethernet
 								 * switch.  At the datapath level, each port has the following information
 								 * associated with it:
 								 *
 								 *    - A name, a short string that must be unique within the host.  This is
 								 *      typically a name that would be familiar to the system administrator,
 								 *      e.g. "eth0" or "vif1.1", but it is otherwise arbitrary.
 								 *
 								 *    - A 32-bit port number that must be unique within the datapath but is
 								 *      otherwise arbitrary.  The port number is the most important identifier
 								 *      for a port in the datapath interface.
 								 *
 								 *    - A type, a short string that identifies the kind of port.  On a Linux
 								 *      host, typical types are "system" (for a network device such as eth0),
 								 *      "internal" (for a simulated port used to connect to the TCP/IP stack),
 								 *      and "gre" (for a GRE tunnel).
 								 *
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								 *    - A Netlink PID for each upcall reading thread (see "Upcall Queuing and
 								 *      Ordering" below).
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 * The dpif interface has functions for adding and deleting ports.  When a
 								 * datapath implements these (e.g. as the Linux and netdev datapaths do), then
 								 * Open vSwitch's ovs-vswitchd daemon can directly control what ports are used
 								 * for switching.  Some datapaths might not implement them, or implement them
-												Remove ESX references.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-05-30 18:05:48 -07:00
+								 * with restrictions on the types of ports that can be added or removed,
 								 * on systems where port membership can only be changed by some external
 								 * entity.
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 * Each datapath must have a port, sometimes called the "local port", whose
 								 * name is the same as the datapath itself, with port number 0.  The local port
 								 * cannot be deleted.
 								 *
 								 * Ports are available as "struct netdev"s.  To obtain a "struct netdev *" for
 								 * a port named 'name' with type 'port_type', in a datapath of type
 								 * 'datapath_type', call netdev_open(name, dpif_port_open_type(datapath_type,
 								 * port_type).  The netdev can be used to get and set important data related to
 								 * the port, such as:
 								 *
 								 *    - MTU (netdev_get_mtu(), netdev_set_mtu()).
 								 *
 								 *    - Ethernet address (netdev_get_etheraddr(), netdev_set_etheraddr()).
 								 *
 								 *    - Statistics such as the number of packets and bytes transmitted and
 								 *      received (netdev_get_stats()).
 								 *
 								 *    - Carrier status (netdev_get_carrier()).
 								 *
-												netdev: Add netdev_get_speed() to netdev API.

Currently, the netdev's speed is being calculated by taking the link's
feature bits (using netdev_get_features()) and transforming them into
bps.

This mechanism can be both inaccurate and difficult to maintain, mainly
because we currently use the feature bits supported by OpenFlow which
would have to be extended to support all new feature bits of all netdev
implementations while keeping the OpenFlow API intact.

In order to expose the link speed accurately for all current and future
hardware, add a new netdev API call that allows the implementations to
provide the current and maximum link speeds in Mbps.

Internally, the logic to get the maximum supported speed still relies on
feature bits so it might still get out of sync in the future. However,
the maximum configurable speed is not used as much as the current speed
and these feature bits are not exposed through the netdev interface so
it should be easier to add more.

Use this new function instead of netdev_get_features() where the link
speed is needed.

As a consequence of this patch, link speeds of cards is properly
reported (internally in OVSDB) even if not supported by OpenFlow.
A test verifies this behavior using a tap device.

Also, in order to avoid using the old, this patch adds a checkpatch.py
warning if the old API is used.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2137567
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-07-17 10:08:11 +02:00
+								 *    - Link features (netdev_get_features()).
 								 *
 								 *    - Speed (netdev_get_speed()).
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 *    - QoS queue configuration (netdev_get_queue(), netdev_set_queue() and
 								 *      related functions.)
 								 *
 								 *    - Arbitrary port-specific configuration parameters (netdev_get_config(),
 								 *      netdev_set_config()).  An example of such a parameter is the IP
 								 *      endpoint for a GRE tunnel.
 								 *
 								 *
 								 * Flow Table
 								 * ==========
 								 *
-												dpif: Document datapath masking.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2013-11-12 17:10:16 -08:00
+								 * The flow table is a collection of "flow entries".  Each flow entry contains:
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 *    - A "flow", that is, a summary of the headers in an Ethernet packet.  The
-												dpif: Document datapath masking.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2013-11-12 17:10:16 -08:00
+								 *      flow must be unique within the flow table.  Flows are fine-grained
 								 *      entities that include L2, L3, and L4 headers.  A single TCP connection
 								 *      consists of two flows, one in each direction.
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 *      In Open vSwitch userspace, "struct flow" is the typical way to describe
 								 *      a flow, but the datapath interface uses a different data format to
-												doc: Populate 'topics' section

There are many docs that don't need to kept at the top level, along
with many more hidden in random folders. Move them all.

This also allows us to add the '-W' flag to Sphinx, ensuring unindexed
docs result in build failures.

Signed-off-by: Stephen Finucane <stephen@that.guru>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-12-08 12:55:26 +00:00
+								 *      allow ABI forward- and backward-compatibility.  Refer to OVS_KEY_ATTR_*
 								 *      and "struct ovs_key_*" in include/odp-netlink.h for details.
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *      lib/odp-util.h defines several functions for working with these flows.
 								 *
-												dpif: Document datapath masking.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2013-11-12 17:10:16 -08:00
+								 *    - A "mask" that, for each bit in the flow, specifies whether the datapath
 								 *      should consider the corresponding flow bit when deciding whether a
 								 *      given packet matches the flow entry.  The original datapath design did
 								 *      not support matching: every flow entry was exact match.  With the
 								 *      addition of a mask, the interface supports datapaths with a spectrum of
 								 *      wildcard matching capabilities, from those that only support exact
 								 *      matches to those that support bitwise wildcarding on the entire flow
 								 *      key, as well as datapaths with capabilities somewhere in between.
 								 *
 								 *      Datapaths do not provide a way to query their wildcarding capabilities,
 								 *      nor is it expected that the client should attempt to probe for the
 								 *      details of their support.  Instead, a client installs flows with masks
 								 *      that wildcard as many bits as acceptable.  The datapath then actually
 								 *      wildcards as many of those bits as it can and changes the wildcard bits
 								 *      that it does not support into exact match bits.  A datapath that can
 								 *      wildcard any bit, for example, would install the supplied mask, an
 								 *      exact-match only datapath would install an exact-match mask regardless
 								 *      of what mask the client supplied, and a datapath in the middle of the
 								 *      spectrum would selectively change some wildcard bits into exact match
 								 *      bits.
 								 *
 								 *      Regardless of the requested or installed mask, the datapath retains the
 								 *      original flow supplied by the client.  (It does not, for example, "zero
 								 *      out" the wildcarded bits.)  This allows the client to unambiguously
 								 *      identify the flow entry in later flow table operations.
 								 *
 								 *      The flow table does not have priorities; that is, all flow entries have
 								 *      equal priority.  Detecting overlapping flow entries is expensive in
 								 *      general, so the datapath is not required to do it.  It is primarily the
 								 *      client's responsibility not to install flow entries whose flow and mask
 								 *      combinations overlap.
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 *    - A list of "actions" that tell the datapath what to do with packets
 								 *      within a flow.  Some examples of actions are OVS_ACTION_ATTR_OUTPUT,
 								 *      which transmits the packet out a port, and OVS_ACTION_ATTR_SET, which
 								 *      modifies packet headers.  Refer to OVS_ACTION_ATTR_* and "struct
-												Do not seemingly #include Linux-specific headers on other platforms.

Until now, the OVS source tree has had a whole maze of header files that
make "#include <linux/openvswitch.h>" work OK regardless of platform, but
this confuses everyone new to the tree, at first glance, and is difficult
to understand at second glance too.

This commit renames include/linux/openvswitch.h to
datapath/linux/compat/include/linux/openvswitch.h without other change,
then modifies the userspace build to generate a header that makes sense in
portable Open vSwitch userspace from that header.

It then removes all the remaining include/linux/* files since they are now
unused.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2014-08-04 11:11:40 -07:00
+								 *      ovs_action_*" in include/odp-netlink.h for details.  lib/odp-util.h
 								 *      defines several functions for working with datapath actions.
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 *      The actions list may be empty.  This indicates that nothing should be
 								 *      done to matching packets, that is, they should be dropped.
 								 *
 								 *      (In case you are familiar with OpenFlow, datapath actions are analogous
 								 *      to OpenFlow actions.)
 								 *
 								 *    - Statistics: the number of packets and bytes that the flow has
 								 *      processed, the last time that the flow processed a packet, and the
 								 *      union of all the TCP flags in packets processed by the flow.  (The
 								 *      latter is 0 if the flow is not a TCP flow.)
 								 *
 								 * The datapath's client manages the flow table, primarily in reaction to
 								 * "upcalls" (see below).
 								 *
 								 *
 								 * Upcalls
 								 * =======
 								 *
 								 * A datapath sometimes needs to notify its client that a packet was received.
 								 * The datapath mechanism to do this is called an "upcall".
 								 *
 								 * Upcalls are used in two situations:
 								 *
 								 *    - When a packet is received, but there is no matching flow entry in its
 								 *      flow table (a flow table "miss"), this causes an upcall of type
 								 *      DPIF_UC_MISS.  These are called "miss" upcalls.
 								 *
 								 *    - A datapath action of type OVS_ACTION_ATTR_USERSPACE causes an upcall of
 								 *      type DPIF_UC_ACTION.  These are called "action" upcalls.
 								 *
 								 * An upcall contains an entire packet.  There is no attempt to, e.g., copy
 								 * only as much of the packet as normally needed to make a forwarding decision.
 								 * Such an optimization is doable, but experimental prototypes showed it to be
 								 * of little benefit because an upcall typically contains the first packet of a
 								 * flow, which is usually short (e.g. a TCP SYN).  Also, the entire packet can
 								 * sometimes really be needed.
 								 *
 								 * After a client reads a given upcall, the datapath is finished with it, that
 								 * is, the datapath doesn't maintain any lingering state past that point.
 								 *
 								 * The latency from the time that a packet arrives at a port to the time that
 								 * it is received from dpif_recv() is critical in some benchmarks.  For
 								 * example, if this latency is 1 ms, then a netperf TCP_CRR test, which opens
 								 * and closes TCP connections one at a time as quickly as it can, cannot
 								 * possibly achieve more than 500 transactions per second, since every
 								 * connection consists of two flows with 1-ms latency to set up each one.
 								 *
 								 * To receive upcalls, a client has to enable them with dpif_recv_set().  A
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								 * datapath should generally support being opened multiple times (e.g. so that
 								 * one may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd"
 								 * is also running) but need not support more than one of these clients
 								 * enabling upcalls at once.
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 *
 								 * Upcall Queuing and Ordering
 								 * ---------------------------
 								 *
 								 * The datapath's client reads upcalls one at a time by calling dpif_recv().
 								 * When more than one upcall is pending, the order in which the datapath
 								 * presents upcalls to its client is important.  The datapath's client does not
 								 * directly control this order, so the datapath implementer must take care
 								 * during design.
 								 *
 								 * The minimal behavior, suitable for initial testing of a datapath
 								 * implementation, is that all upcalls are appended to a single queue, which is
 								 * delivered to the client in order.
 								 *
 								 * The datapath should ensure that a high rate of upcalls from one particular
 								 * port cannot cause upcalls from other sources to be dropped or unreasonably
 								 * delayed.  Otherwise, one port conducting a port scan or otherwise initiating
 								 * high-rate traffic spanning many flows could suppress other traffic.
 								 * Ideally, the datapath should present upcalls from each port in a "round
 								 * robin" manner, to ensure fairness.
 								 *
 								 * The client has no control over "miss" upcalls and no insight into the
 								 * datapath's implementation, so the datapath is entirely responsible for
 								 * queuing and delivering them.  On the other hand, the datapath has
 								 * considerable freedom of implementation.  One good approach is to maintain a
 								 * separate queue for each port, to prevent any given port's upcalls from
 								 * interfering with other ports' upcalls.  If this is impractical, then another
 								 * reasonable choice is to maintain some fixed number of queues and assign each
 								 * port to one of them.  Ports assigned to the same queue can then interfere
 								 * with each other, but not with ports assigned to different queues.  Other
 								 * approaches are also possible.
 								 *
 								 * The client has some control over "action" upcalls: it can specify a 32-bit
 								 * "Netlink PID" as part of the action.  This terminology comes from the Linux
 								 * datapath implementation, which uses a protocol called Netlink in which a PID
 								 * designates a particular socket and the upcall data is delivered to the
 								 * socket's receive queue.  Generically, though, a Netlink PID identifies a
 								 * queue for upcalls.  The basic requirements on the datapath are:
 								 *
 								 *    - The datapath must provide a Netlink PID associated with each port.  The
 								 *      client can retrieve the PID with dpif_port_get_pid().
 								 *
 								 *    - The datapath must provide a "special" Netlink PID not associated with
 								 *      any port.  dpif_port_get_pid() also provides this PID.  (ovs-vswitchd
 								 *      uses this PID to queue special packets that must not be lost even if a
 								 *      port is otherwise busy, such as packets used for tunnel monitoring.)
 								 *
 								 * The minimal behavior of dpif_port_get_pid() and the treatment of the Netlink
 								 * PID in "action" upcalls is that dpif_port_get_pid() returns a constant value
 								 * and all upcalls are appended to a single queue.
 								 *
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								 * The preferred behavior is:
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 *
 								 *    - Each port has a PID that identifies the queue used for "miss" upcalls
 								 *      on that port.  (Thus, if each port has its own queue for "miss"
 								 *      upcalls, then each port has a different Netlink PID.)
 								 *
 								 *    - "miss" upcalls for a given port and "action" upcalls that specify that
 								 *      port's Netlink PID add their upcalls to the same queue.  The upcalls
 								 *      are delivered to the datapath's client in the order that the packets
 								 *      were received, regardless of whether the upcalls are "miss" or "action"
 								 *      upcalls.
 								 *
 								 *    - Upcalls that specify the "special" Netlink PID are queued separately.
 								 *
 								 *
 								 * Packet Format
 								 * =============
 								 *
 								 * The datapath interface works with packets in a particular form.  This is the
 								 * form taken by packets received via upcalls (i.e. by dpif_recv()).  Packets
 								 * supplied to the datapath for processing (i.e. to dpif_execute()) also take
 								 * this form.
 								 *
 								 * A VLAN tag is represented by an 802.1Q header.  If the layer below the
 								 * datapath interface uses another representation, then the datapath interface
 								 * must perform conversion.
 								 *
 								 * The datapath interface requires all packets to fit within the MTU.  Some
 								 * operating systems internally process packets larger than MTU, with features
 								 * such as TSO and UFO.  When such a packet passes through the datapath
 								 * interface, it must be broken into multiple MTU or smaller sized packets for
 								 * presentation as upcalls.  (This does not happen often, because an upcall
 								 * typically contains the first packet of a flow, which is usually short.)
 								 *
 								 * Some operating system TCP/IP stacks maintain packets in an unchecksummed or
 								 * partially checksummed state until transmission.  The datapath interface
 								 * requires all host-generated packets to be fully checksummed (e.g. IP and TCP
 								 * checksums must be correct).  On such an OS, the datapath interface must fill
 								 * in these checksums.
 								 *
 								 * Packets passed through the datapath interface must be at least 14 bytes
 								 * long, that is, they must have a complete Ethernet header.  They are not
 								 * required to be padded to the minimum Ethernet length.
 								 *
 								 *
 								 * Typical Usage
 								 * =============
 								 *
 								 * Typically, the client of a datapath begins by configuring the datapath with
 								 * a set of ports.  Afterward, the client runs in a loop polling for upcalls to
 								 * arrive.
 								 *
 								 * For each upcall received, the client examines the enclosed packet and
 								 * figures out what should be done with it.  For example, if the client
 								 * implements a MAC-learning switch, then it searches the forwarding database
 								 * for the packet's destination MAC and VLAN and determines the set of ports to
 								 * which it should be sent.  In any case, the client composes a set of datapath
 								 * actions to properly dispatch the packet and then directs the datapath to
 								 * execute those actions on the packet (e.g. with dpif_execute()).
 								 *
 								 * Most of the time, the actions that the client executed on the packet apply
 								 * to every packet with the same flow.  For example, the flow includes both
 								 * destination MAC and VLAN ID (and much more), so this is true for the
 								 * MAC-learning switch example above.  In such a case, the client can also
 								 * direct the datapath to treat any further packets in the flow in the same
 								 * way, using dpif_flow_put() to add a new flow entry.
 								 *
 								 * Other tasks the client might need to perform, in addition to reacting to
 								 * upcalls, include:
 								 *
 								 *    - Periodically polling flow statistics, perhaps to supply to its own
 								 *      clients.
 								 *
 								 *    - Deleting flow entries from the datapath that haven't been used
 								 *      recently, to save memory.
 								 *
 								 *    - Updating flow entries whose actions should change.  For example, if a
 								 *      MAC learning switch learns that a MAC has moved, then it must update
 								 *      the actions of flow entries that sent packets to the MAC at its old
 								 *      location.
 								 *
 								 *    - Adding and removing ports to achieve a new configuration.
-												dpif: Make dpifs thread-safe, and document it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-07-25 10:31:42 -07:00
+								 *
 								 *
 								 * Thread-safety
 								 * =============
 								 *
 								 * Most of the dpif functions are fully thread-safe: they may be called from
 								 * any number of threads on the same or different dpif objects.  The exceptions
 								 * are:
 								 *
 								 *    - dpif_port_poll() and dpif_port_poll_wait() are conditionally
 								 *      thread-safe: they may be called from different threads only on
 								 *      different dpif objects.
 								 *
-												dpif: Make dpif_flow_dump_next() thread-safe.

This patch makes it the caller's responsibility to initialize a
per-thread 'state' object and pass it down to the dpif_flow_dump_next()
implementation. The implementation can expect to be called from multiple
threads with the same 'iter' and different 'state' objects.

When flow_dump_next() returns non-zero, the implementation must ensure
that subsequent calls with the same arguments also return non-zero.
Subsequent calls with the same 'iter' and different 'state' may return
zero, but should make progress towards returning non-zero.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-27 14:13:08 -08:00
+								 *    - dpif_flow_dump_next() is conditionally thread-safe: It may be called
 								 *      from different threads with the same 'struct dpif_flow_dump', but all
 								 *      other parameters must be different for each thread.
 								 *
 								 *    - dpif_flow_dump_done() is conditionally thread-safe: All threads that
 								 *      share the same 'struct dpif_flow_dump' must have finished using it.
 								 *      This function must then be called exactly once for a particular
 								 *      dpif_flow_dump to finish the corresponding flow dump operation.
 								 *
 								 *    - Functions that operate on 'struct dpif_port_dump' are conditionally
 								 *      thread-safe with respect to those objects.  That is, one may dump ports
 								 *      from any number of threads at once, but each thread must use its own
 								 *      struct dpif_port_dump.
-												dpif: Document.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-01-09 14:10:46 -08:00
+								 */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#ifndef DPIF_H
 								#define DPIF_H 1
 								#include <stdbool.h>
 								#include <stddef.h>
 								#include <stdint.h>
-												dpdk: New module with some code from netdev-dpdk.

There's a lot of code in netdev-dpdk which is not at all related to the
netdev interface, mostly the library initialization code.

This commit moves it to a new 'dpdk' module, to simplify 'netdev-dpdk'.

Also a new module 'dpdk-stub' is introduced to implement some functions
when DPDK is not available.  This replaces the old 'netdev-nodpdk'
module.

Some redundant includes are removed or reorganized as a consequence.

No functional change.

CC: Aaron Conole <aconole@redhat.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Aaron Conole <aconole@redhat.com>
Tested-by: Aaron Conole <aconole@redhat.com>

											
										
										
											2016-10-04 17:58:05 -07:00
 								#include "dpdk.h"
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								#include "dp-packet.h"
-												dpif: Meter framework.

Add DPIF-level infrastructure for meters.  Allow meter_set to modify
the meter configuration (e.g. set the burst size if unspecified).

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Andy Zhou <azhou@ovn.org>

											
										
										
											2017-02-23 11:27:54 -08:00
+								#include "netdev.h"
-												ofproto: Avoid user->kernel->user round-trip for many controller actions.

When an OpenFlow flow says to send packets to the controller, until now
ofproto has executed that using dpif_execute(), which passes the packet up
to the kernel.  The kernel queues the packet into its "action" queue, and
then later ofproto pulls the packet back down from the kernel and sends it
to the controller.

However, this is unnecessary.  Open vSwitch can just recognize in advance
that it will get the packet back and handle it directly, skipping the round
trip.  This commit implements this optimization.

This generally affects only the first packet in a flow, since generally the
rest come directly down from the kernel.  It only optimizes the "easy" case
where the first action in a flow is to send the packet to the controller,
since this seems to be the common case in the flows that I'm looking at
now.

											
										
										
											2010-08-04 14:08:26 -07:00
+								#include "openflow/openflow.h"
-												ofp-util, ofp-parse: Break up into many separate modules.

ofp-util had been far too large and monolithic for a long time.  This
commit breaks it up into units that make some logical sense.  It also
moves the pieces of ofp-parse that were specific to each unit into the
relevant unit.

Most of this commit is just moving code around.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-02-09 10:04:26 -08:00
+								#include "openvswitch/ofp-meter.h"
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								#include "ovs-numa.h"
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								#include "packets.h"
-												ofproto: Avoid user->kernel->user round-trip for many controller actions.

When an OpenFlow flow says to send packets to the controller, until now
ofproto has executed that using dpif_execute(), which passes the packet up
to the kernel.  The kernel queues the packet into its "action" queue, and
then later ofproto pulls the packet back down from the kernel and sends it
to the controller.

However, this is unnecessary.  Open vSwitch can just recognize in advance
that it will get the packet back and handle it directly, skipping the round
trip.  This commit implements this optimization.

This generally affects only the first packet in a flow, since generally the
rest come directly down from the kernel.  It only optimizes the "easy" case
where the first action in a flow is to send the packet to the controller,
since this seems to be the common case in the flows that I'm looking at
now.

											
										
										
											2010-08-04 14:08:26 -07:00
+								#include "util.h"
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												Add extern "C" to more header files.

From partner.

											
										
										
											2010-02-17 10:36:57 -05:00
+								#ifdef  __cplusplus
 								extern "C" {
 								#endif
-												dpif: Hide the contents of struct dpif.

This helps prepare for multiple dpif implementations, and ensures that
code outside dpif.c does not depend on its internals.

											
										
										
											2009-06-16 10:09:10 -07:00
+								struct dpif;
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								struct dpif_class;
 								struct dpif_flow;
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								struct ds;
-												flow: Move flow_extract_stats() to dpif.c, as dpif_flow_stats_extract().

The "flow" module is concerned only with OpenFlow flows these days.  It
shouldn't have anything to do with ODP or dpifs.  However, it included
dpif.h just to implement flow_extract_stats().  This function is a better
fit for dpif.c, so this commit moves it there and removes the dpif.h
#include from flow.h and flow.c

This commit also removes a few more dpif.h #includes that weren't needed.

											
										
										
											2011-09-29 15:36:14 -07:00
+								struct flow;
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								struct flow_wildcards;
-												datapath: Replace "struct odp_action" by Netlink attributes.

In the medium term, we plan to migrate the datapath to use Netlink as its
communication channel.  In the short term, we need to be able to have
actions with 64-bit arguments but "struct odp_action" only has room for
48 bits.  So this patch shifts to variable-length arguments using Netlink
attributes, which starts in on the Netlink transition and makes 64-bit
arguments possible at the same time.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-10 10:40:58 -08:00
+								struct nlattr;
-												dpif: Use sset instead of svec in dpif interface.

											
										
										
											2011-03-25 13:00:13 -07:00
+								struct sset;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												dpif: Allow providers to be managed at runtime.

The list of datapath providers was previously staticly defined at
compile time.  This allows new providers to be added and removed
at runtime.

											
										
										
											2010-02-01 11:36:01 -05:00
+								int dp_register_provider(const struct dpif_class *);
 								int dp_unregister_provider(const char *type);
-												Eliminate "whitelist" and "blacklist" terms.

There is one remaining use under datapath.  That change should happen
upstream in Linux first according to our usual policy.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@ovn.org>

											
										
										
											2020-06-17 14:22:47 -07:00
+								void dp_disallow_provider(const char *type);
-												dpif: Use sset instead of svec in dpif interface.

											
										
										
											2011-03-25 13:00:13 -07:00
+								void dp_enumerate_types(struct sset *types);
-												ofproto: Complete abstraction by adding enumeration and deletion functions.

This eliminates the final reference from bridge.c directly into the dpif
layer, which will make it easier to change the implementation of ofproto
to support other lower layers.

											
										
										
											2011-05-09 09:33:02 -07:00
+								const char *dpif_normalize_type(const char *);
-												dpif: Allow providers to be managed at runtime.

The list of datapath providers was previously staticly defined at
compile time.  This allows new providers to be added and removed
at runtime.

											
										
										
											2010-02-01 11:36:01 -05:00
-												dpif: Use sset instead of svec in dpif interface.

											
										
										
											2011-03-25 13:00:13 -07:00
+								int dp_enumerate_names(const char *type, struct sset *names);
-												dpif: Update dpif interface to match netdev.

This brings over some features that were added to the netdev interface,
most notably the separation between the name and the type.  In addition
to being cleaner, this also avoids problems where it is expected that
the local port has the same name as the datapath.

											
										
										
											2010-01-22 14:37:10 -05:00
+								void dp_parse_name(const char *datapath_name, char **name, char **type);
-												dpif: Add new functions dp_run() and dp_wait().

The upcoming netdev-based dpif needs a hook where it can process packets
and throw them against the flow table, and this provides a suitable place.

											
										
										
											2009-06-19 14:09:09 -07:00
-												dpif: Update dpif interface to match netdev.

This brings over some features that were added to the netdev interface,
most notably the separation between the name and the type.  In addition
to being cleaner, this also avoids problems where it is expected that
the local port has the same name as the datapath.

											
										
										
											2010-01-22 14:37:10 -05:00
+								int dpif_open(const char *name, const char *type, struct dpif **);
 								int dpif_create(const char *name, const char *type, struct dpif **);
 								int dpif_create_and_open(const char *name, const char *type, struct dpif **);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								void dpif_close(struct dpif *);
-												openvswitch: Userspace tunneling.

Following patch adds support for userspace tunneling. Tunneling
needs three more component first is routing table which is configured by
caching kernel routes and second is ARP cache which build automatically
by snooping arp. And third is tunnel protocol table which list all
listening protocols which is populated by vswitchd as tunnel ports
are added. GRE and VXLAN protocol support is added in this patch.

Tunneling works as follows:
On packet receive vswitchd check if this packet is targeted to tunnel
port. If it is then vswitchd inserts tunnel pop action which pops
header and sends packet to tunnel port.
On packet xmit rather than generating Set tunnel action it generate
tunnel push action which has tunnel header data. datapath can use
tunnel-push action data to generate header for each packet and
forward this packet to output port. Since tunnel-push action
contains most of packet header vswitchd needs to lookup routing
table and arp table to build this action.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-11-11 11:53:47 -08:00
+								bool dpif_run(struct dpif *);
-												dpif: Improve abstraction by making 'run' and 'wait' functions per-dpif.

Until now, the dp_run() and dp_wait() functions had to be called at the top
level of the program because they applied to every open dpif.  By replacing
them by functions that take a specific dpif as an argument, we can call
them only from ofproto, which is currently the correct layer to deal with
dpifs.

											
										
										
											2011-05-06 15:04:29 -07:00
+								void dpif_wait(struct dpif *);
-												dpif: Replace dpif_id() by dpif_name().

dpif_id() is often used in error messages, e.g. "dp%u: screwed up".  But
soon we will be generalizing the concept of a datapath, so it is better
to have a function that returns a full name, e.g. "%s: screwed up".
Accordingly, this commit replaces dpif_id() by a new function dpif_name()
that does so.

											
										
										
											2009-06-16 11:00:22 -07:00
+								const char *dpif_name(const struct dpif *);
-												dpif: Update dpif interface to match netdev.

This brings over some features that were added to the netdev interface,
most notably the separation between the name and the type.  In addition
to being cleaner, this also avoids problems where it is expected that
the local port has the same name as the datapath.

											
										
										
											2010-01-22 14:37:10 -05:00
+								const char *dpif_base_name(const struct dpif *);
-												dpif: Add function to get the dpif type.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-01 16:04:06 -07:00
+								const char *dpif_type(const struct dpif *);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												vswitchd: Always cleanup userspace datapath.

'netdev' datapath is implemented within ovs-vswitchd process and can
not exist without it, so it should be gracefully terminated with a
full cleanup of resources upon ovs-vswitchd exit.

This change forces dpif cleanup for 'netdev' datapath regardless of
passing '--cleanup' to 'ovs-appctl exit'. Such solution allowes to
not pass this additional option everytime for userspace datapath
installations and also allowes to not terminate system datapath in
setups where both datapaths runs at the same time.

The main part is that dpif_port_del() will lead to netdev_close()
and subsequent netdev_class->destroy(dev) which will stop HW NICs
and free their resources. For vhost-user interfaces it will invoke
vhost driver unregistering with a properly closed vhost-user
connection. For upcoming AF_XDP netdev this will allow to gracefully
destroy xdp sockets and unload xdp programs from linux interfaces.
Another important thing is that port deletion will also trigger
flushing of flows offloaded to HW NICs.

Exception made for 'internal' ports that could have user ip/route
configuration. These ports will not be removed without '--cleanup'.

This change fixes OVS disappearing from the DPDK point of view
(keeping HW NICs improperly configured, sudden closing of vhost-user
connections) and will help with linux devices clearing with upcoming
AF_XDP netdev support.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Tested-by: William Tu <u9012063@gmail.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-06-24 17:20:17 +03:00
+								bool dpif_cleanup_required(const struct dpif *);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								int dpif_delete(struct dpif *);
-												dpif: Fix minor typo in comment.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-09-25 16:42:45 -07:00
+								/* Statistics for a dpif as a whole. */
-												dpif: Avoid use of  "struct ovs_dp_stats" in platform-independent modules.

Over time we wish to reduce the number of datapath-protocol.h definitions
used directly outside of Linux-specific code.  This commit removes use of
"struct ovs_dp_stats" from platform-independent code.

Bug #7559.

											
										
										
											2011-10-05 11:18:13 -07:00
+								struct dpif_dp_stats {
 								    uint64_t n_hit;             /* Number of flow table matches. */
 								    uint64_t n_missed;          /* Number of flow table misses. */
 								    uint64_t n_lost;            /* Number of misses not sent to userspace. */
 								    uint64_t n_flows;           /* Number of flows present. */
-												dpctl: dpif: Add kernel datapath cache hit output.

This patch adds cache usage statistics to the output:

 $ ovs-dpctl show
 system@ovs-system:
   lookups: hit:24 missed:71 lost:0
   flows: 0
   masks: hit:334 total:0 hit/pkt:3.52
   cache: hit:4 hit-rate:4.21%
   port 0: ovs-system (internal)
   port 1: genev_sys_6081 (geneve: packet_type=ptap)
   port 2: br-int (internal)
   port 3: br-ex (internal)
   port 4: eth2
   port 5: sw1p1 (internal)
   port 6: sw0p4 (internal)

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Paolo Valerio <pvalerio@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-09-06 10:53:42 +02:00
+								    uint64_t n_cache_hit;       /* Number of mega flow mask cache hits for
 								                                   flow table matches. */
-												dpif-linux: collect and display mega flow mask stats

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-10-21 14:37:34 -07:00
+								    uint64_t n_mask_hit;        /* Number of mega flow masks visited for
 								                                   flow table matches. */
-												dpif-linux: fix the size of n_masks

The command ovs-dpctl can wrongly output the masks even if the
datapath does not implement mega flows. In this case the output
will be similar to the following:

system@ovs-system:
	lookups: hit:14 missed:41 lost:0
	flows: 0
	masks: hit:18446744073709551615 total:4294967295
		hit/pkt:335395346794719104.00
	port 0: ovs-system (internal)
	port 1: gre_system (gre: df_default=false, ttl=0)
	port 2: ots-br0 (internal)
	port 3: int0 (internal)
	port 4: vnet0
	port 5: vnet1

The problem depends on the fact that n_masks stats is stored as a
uint32 in the struct ovs_dp_megaflow_stats and as a uint64 in the
struct dpif_dp_stats. UINT32_MAX instead of UINT64_MAX should be
used to detect if the datapath supports megaflows or not.

Signed-off-by: Francesco Fusco <ffusco@redhat.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-17 20:18:18 +01:00
+								    uint32_t n_masks;           /* Number of mega flow masks. */
-												dpif: Avoid use of  "struct ovs_dp_stats" in platform-independent modules.

Over time we wish to reduce the number of datapath-protocol.h definitions
used directly outside of Linux-specific code.  This commit removes use of
"struct ovs_dp_stats" from platform-independent code.

Bug #7559.

											
										
										
											2011-10-05 11:18:13 -07:00
+								};
 								int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);
-												dpif: Add support to set user features

This enables user features on the kernel datapath via the DP_CMD_SET
command, and also retrieves them to check for actual support and
not just an older kernel ignoring the requested features.

This will be used in next patch to enable recirc_id sharing with tc.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2019-12-22 12:16:38 +02:00
+								int dpif_set_features(struct dpif *, uint32_t new_features);
-												dpif-netlink: Fix issues of the offloaded flows counter.

The n_offloaded_flows counter is saved in dpif, and this is the first
one when ofproto is created. When flow operation is done by ovs-appctl
commands, such as, dpctl/add-flow, a new dpif is opened, and the
n_offloaded_flows in it can't be used. So, instead of using counter,
the number of offloaded flows is queried from each netdev, then sum
them up. To achieve this, a new API is added in netdev_flow_api to get
how many flows assigned to a netdev.

In order to get better performance, this number is calculated directly
from tc_to_ufid hmap for netdev-offload-tc, because flow dumping by tc
takes much time if there are many flows offloaded.

Fixes: af0618470507 ("dpif-netlink: Count the number of offloaded rules")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-12-17 02:47:32 +00:00
+								int dpif_get_n_offloaded_flows(struct dpif *dpif, uint64_t *n_flows);
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
 								/* Port operations. */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												Add functions to determine how port should be opened based on type.

Depending on the port and type of datapath, a port may need to be opened
as a different type of device than it's configured.  For example, an
"internal" port on a "dummy" datapath should opened as a "dummy" port.
This commit adds the ability for a dpif to provide this information to a
caller.  It will be used in a future commit.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-11-14 15:50:20 -08:00
+								const char *dpif_port_open_type(const char *datapath_type,
 								                                const char *port_type);
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								int dpif_port_add(struct dpif *, struct netdev *, odp_port_t *port_nop);
-												netdev, dpif: fix the crash/assert on port delete

a crash is seen in "netdev_ports_remove" when an interface is deleted and added
back in the system and when the interface is part of a bridge configuration.
e.g. steps:
  create a tap0 interface using "ip tuntap add.."
  add the tap0 interface to br0 using "ovs-vsctl add-port.."
  delete the tap0 interface from system using "ip tuntap del.."
  add the tap0 interface back in system using "ip tuntap add.."
                       (this changes the ifindex of the interface)
  delete tap0 from br0 using "ovs-vsctl del-port.."

In the function "netdev_ports_insert", two hmap entries were created for
mapping "portnum -> netdev" and "ifindex -> portnum".
When the interface is deleted from the system, the "netdev_ports_remove"
function is not getting called and the old ifindex entry is not getting
cleaned up from the "ifindex_to_port" hmap.

As part of the fix, added function "dpif_port_remove" which will call
"netdev_ports_remove" in the path where the interface deletion from the system
is detected.
Also, in "netdev_ports_remove", added the code where the "ifindex_to_port_data"
(ifindex -> portnum map node) is getting freed when the ifindex is not
available any more. (as the interface is already deleted.)

VMware-BZ: #1975788
Signed-off-by: Ashish Varma <ashishvarma.ovs@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-11-06 12:17:45 -08:00
+								int dpif_port_del(struct dpif *, odp_port_t port_no, bool local_delete);
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
 								/* A port within a datapath.
 								 *
 								 * 'name' and 'type' are suitable for passing to netdev_open(). */
 								struct dpif_port {
 								    char *name;                 /* Network device name, e.g. "eth0". */
 								    char *type;                 /* Network device type, e.g. "system". */
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								    odp_port_t port_no;         /* Port number within datapath. */
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								};
 								void dpif_port_clone(struct dpif_port *, const struct dpif_port *);
 								void dpif_port_destroy(struct dpif_port *);
-												dpif: Add new dpif_port_exists() function.

Provide the ability to determine whether a port exists in a datapath
without having to deal with a "dpif_port" structure as with
dpif_port_query_by_name().  A future patch will use this function.

Signed-off-by: Justin Pettit <jpettit@nicira.com>

											
										
										
											2012-10-17 23:11:53 -07:00
+								bool dpif_port_exists(const struct dpif *dpif, const char *devname);
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								int dpif_port_query_by_number(const struct dpif *, odp_port_t port_no,
-												ofproto-dpif: Fix removal of renamed datapath ports.

OVS configuration is based on port names and OpenFlow port numbers.
Names are stored in the database and translated later to OF ports.
On the datapath level, each port has a name and a datapath port number.
Port name in the database has to match datapath port name, unless it's
a tunnel port.

If a datapath port is renamed with 'ip link set DEV name NAME',
ovs-vswitchd will wake up, destroy all the OpenFlow-related structures
and clean other things up.  This is because the port no longer
represents the port from a database due to a name difference.

However, ovs-vswitch will not actually remove the port from the
datapath, because it thinks that this port is no longer there.  This
is happening because lookup is performed by name and the name have
changed.  As a result we have a port in a datapath that is not related
to any port known to ovs-vswitchd and ovs-vswitchd can't remove it.
This port also occupies a datapath port number and prevents the port
to be added back with a new name.

Fix that by performing lookup by a datapath port number during the port
destruction.  The name was used only to avoid spurious warnings in a
normal case where the port was successfully deleted by other parts of
OVS.  Adding an extra flag to avoid these warnings instead.

Fixes: 02f8d6460afd ("ofproto-dpif: Query port existence by name to prevent warnings.")
Reported-at: https://github.com/openvswitch/ovs-issues/issues/284
Tested-by: Alin-Gabriel Serdean <aserdean@ovn.org>
Acked-by: Alin-Gabriel Serdean <aserdean@ovn.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-07-19 18:14:04 +02:00
+								                              struct dpif_port *, bool warn_if_not_found);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								int dpif_port_query_by_name(const struct dpif *, const char *devname,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                            struct dpif_port *);
-												Create specific types for ofp and odp port

Until now, datapath ports and openflow ports were both represented by
unsigned integers of various sizes. With implicit conversions, etc., it is
easy to mix them up and use one where the other is expected.  This commit
creates two typedefs, ofp_port_t and odp_port_t.  Both of these two types
are marked by "__attribute__((bitwise))" so that sparse can be used to
detect any misuse.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 16:58:44 -07:00
+								int dpif_port_get_name(struct dpif *, odp_port_t port_no,
-												dpif: Rename dpif_get_name() to dpif_port_get_name(), update interface.

With multiple kinds of datapaths, code should not just use
"dp%u" along with dpif_minor() to print a datapath name, because not all
datapaths can sensibly be named that way.  We want to use a function
with a name like dpif_get_name() to retrieve a datapath name for printing
to the user, in which case the existing dpif_get_name() function would be
confusing.  So rename the existing one to something more explicit.

											
										
										
											2009-06-15 16:51:46 -07:00
+								                       char *name, size_t name_size);
-												dpif: Remove support for multiple queues per port.

Commit 69c51582ff78 ("dpif-netlink: don't allocate per thread netlink
sockets") removed dpif-netlink support for multiple queues per port.
No remaining dpif provider supports multiple queues per port, so
remove infrastructure for the feature.

CC: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>

											
										
										
											2018-09-25 15:14:13 -07:00
+								uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no);
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
 								struct dpif_port_dump {
 								    const struct dpif *dpif;
 								    int error;
 								    void *state;
 								};
 								void dpif_port_dump_start(struct dpif_port_dump *, const struct dpif *);
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								bool dpif_port_dump_next(struct dpif_port_dump *, struct dpif_port *);
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								int dpif_port_dump_done(struct dpif_port_dump *);
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								/* Iterates through each DPIF_PORT in DPIF, using DUMP as state.
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								 *
 								 * Arguments all have pointer type.
 								 *
 								 * If you break out of the loop, then you need to free the dump structure by
 								 * hand using dpif_port_dump_done(). */
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								#define DPIF_PORT_FOR_EACH(DPIF_PORT, DUMP, DPIF)   \
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    for (dpif_port_dump_start(DUMP, DPIF);          \
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								         (dpif_port_dump_next(DUMP, DPIF_PORT)      \
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								          ? true                                    \
 								          : (dpif_port_dump_done(DUMP), false));    \
 								        )
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								int dpif_port_poll(const struct dpif *, char **devnamep);
 								void dpif_port_poll_wait(const struct dpif *);
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
 								/* Flow table operations. */
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								struct dpif_flow_stats {
 								    uint64_t n_packets;
 								    uint64_t n_bytes;
 								    long long int used;
-												Widen TCP flags handling.

Widen TCP flags handling from 7 bits (uint8_t) to 12 bits (uint16_t).
The kernel interface remains at 8 bits, which makes no functional
difference now, as none of the higher bits is currently of interest
to the userspace.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-10-28 13:54:39 -07:00
+								    uint16_t tcp_flags;
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								};
-												Add offload packets statistics

Add argument '--offload-stats' for command ovs-appctl bridge/dump-flows
to display the offloaded packets statistics.

The commands display as below:

orignal command:

ovs-appctl bridge/dump-flows br0

duration=574s, n_packets=1152, n_bytes=110768, priority=0,actions=NORMAL
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=2,recirc_id=0,actions=drop
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x1,actions=controller(reason=)
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x2,actions=drop
table_id=254, duration=574s, n_packets=0, n_bytes=0, priority=0,reg0=0x3,actions=drop

new command with argument '--offload-stats'

Notice: 'n_offload_packets' are a subset of n_packets and 'n_offload_bytes' are
a subset of n_bytes.

ovs-appctl bridge/dump-flows --offload-stats br0

duration=582s, n_packets=1152, n_bytes=110768, n_offload_packets=1107, n_offload_bytes=107992, priority=0,actions=NORMAL
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=2,recirc_id=0,actions=drop
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x1,actions=controller(reason=)
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x2,actions=drop
table_id=254, duration=582s, n_packets=0, n_bytes=0, n_offload_packets=0, n_offload_bytes=0, priority=0,reg0=0x3,actions=drop

Signed-off-by: zhaozhanxu <zhaozhanxu@163.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2019-12-05 14:26:25 +08:00
+								/* more statistics info for offloaded packets and bytes */
 								struct dpif_flow_detailed_stats {
 								    uint64_t n_packets;
 								    uint64_t n_bytes;
 								    /* n_offload_packets are a subset of n_packets */
 								    uint64_t n_offload_packets;
 								    /* n_offload_bytes are a subset of n_bytes */
 								    uint64_t n_offload_bytes;
 								    long long int used;
 								    uint16_t tcp_flags;
 								};
-												dpctl: Properly reflect a rule's offloaded to HW state

Previously, any rule that is offloaded via a netdev, not necessarily
to the HW, would be reported as "offloaded". This patch fixes this
misalignment, and introduces the 'dp' state, as follows:

rule is in HW via TC offload  -> offloaded=yes dp:tc
rule is in not HW over TC DP  -> offloaded=no  dp:tc
rule is in not HW over OVS DP -> offloaded=no  dp:ovs

To achieve this, the flows's 'offloaded' flag was encapsulated in a new
attrs struct, which contains the offloaded state of the flow and the
DP layer the flow is handled in, and instead of setting the flow's
'offloaded' state based solely on the type of dump it was acquired
via, for netdev flows it now sends the new attrs struct to be
collected along with the rest of the flow via the netdev, allowing
it to be set per flow.

For TC offloads, the offloaded state is set based on the 'in_hw' and
'not_in_hw' flags received from the TC as part of the flower. If no
such flag was received, due to lack of kernel support, it defaults
to true.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Acked-by: Roi Dayan <roid@mellanox.com>
[simon: resolved conflict in lib/dpctl.man]
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-06-07 09:36:59 +03:00
+								struct dpif_flow_attrs {
-												dpif: Fix dp_extra_info leak by reworking the allocation scheme.

dpctl module leaks the 'dp_extra_info' in case the dumped flow doesn't
fit the dump filter while executing dpctl/dump-flows and also while
executing dpctl/get-flow.

This is already a 3rd attempt to fix all the leaks and incorrect usage
of this string that definitely indicates poor initial design of the
feature.

Flow dump/get documentation clearly states that the caller does not own
the data provided in dpif_flow.  Datapath still owns all the data and
promises to not free/modify it until the next quiescent period, however
we're requesting the caller to free 'dp_extra_info' and this obviously
breaks the rules.

This patch fixes the issue by by storing 'dp_extra_info' within
'struct dp_netdev_flow' making datapath to own it.  'dp_netdev_flow'
is RCU-protected, so it will be valid until the next quiescent period.

Fixes: 0e8f5c6a38d0 ("dpif-netdev: Modified ovs-appctl dpctl/dump-flows command")
Tested-by: Emma Finn <emma.finn@intel.com>
Acked-by: Emma Finn <emma.finn@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-01-23 19:10:05 +01:00
+								    bool offloaded;            /* True if flow is offloaded to HW. */
 								    const char *dp_layer;      /* DP layer the flow is handled in. */
 								    const char *dp_extra_info; /* Extra information provided by DP. */
-												dpctl: Properly reflect a rule's offloaded to HW state

Previously, any rule that is offloaded via a netdev, not necessarily
to the HW, would be reported as "offloaded". This patch fixes this
misalignment, and introduces the 'dp' state, as follows:

rule is in HW via TC offload  -> offloaded=yes dp:tc
rule is in not HW over TC DP  -> offloaded=no  dp:tc
rule is in not HW over OVS DP -> offloaded=no  dp:ovs

To achieve this, the flows's 'offloaded' flag was encapsulated in a new
attrs struct, which contains the offloaded state of the flow and the
DP layer the flow is handled in, and instead of setting the flow's
'offloaded' state based solely on the type of dump it was acquired
via, for netdev flows it now sends the new attrs struct to be
collected along with the rest of the flow via the netdev, allowing
it to be set per flow.

For TC offloads, the offloaded state is set based on the 'in_hw' and
'not_in_hw' flags received from the TC as part of the flower. If no
such flag was received, due to lack of kernel support, it defaults
to true.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Acked-by: Roi Dayan <roid@mellanox.com>
[simon: resolved conflict in lib/dpctl.man]
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-06-07 09:36:59 +03:00
+								};
-												dpctl: Expand the flow dump type filter

Added new types to the flow dump filter, and allowed multiple filter
types to be passed at once, as a comma separated list. The new types
added are:
 * tc - specifies flows handled by the tc dp
 * non-offloaded - specifies flows not offloaded to the HW
 * all - specifies flows of all types

The type list is now fully parsed by the dpctl, and a new struct was
added to dpif which enables dpctl to define which types of dumps to
provide, rather than passing the type string and having dpif parse it.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Acked-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-08-10 11:30:08 +03:00
+								struct dpif_flow_dump_types {
 								    bool ovs_flows;
 								    bool netdev_flows;
 								};
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								void dpif_flow_stats_extract(const struct flow *, const struct dp_packet *packet,
-												dpif: Add 'used' argument to dpif_flow_stats_extract().

The following commit will need to use a value other than a literal
time_msec() in one case.  This commit is just preparation.

Factoring the time_msec() call out of the loop in
handle_flow_miss_without_facet() is a really minor optimization.  It isn't
the main point here.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-08-17 23:27:39 -07:00
+								                             long long int used, struct dpif_flow_stats *);
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								void dpif_flow_stats_format(const struct dpif_flow_stats *, struct ds *);
-												dpif: Eliminate ODPPF_* constants from client-visible interface.

Following this commit, the ODPPF_* constants are only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:12:24 -08:00
+								enum dpif_flow_put_flags {
 								    DPIF_FP_CREATE = 1 << 0,    /* Allow creating a new flow. */
 								    DPIF_FP_MODIFY = 1 << 1,    /* Allow modifying an existing flow. */
-												dpif: Use OVS_FLOW_ATTR_PROBE.

Use the new OVS_FLOW_ATTR_PROBE flag when probing for datapath feature
support.  Suppress also dpif error logging when requested, as probe
failures are already logged at ofproto-dpif.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-09-12 11:20:13 -07:00
+								    DPIF_FP_ZERO_STATS = 1 << 2, /* Zero the stats of an existing flow. */
 								    DPIF_FP_PROBE = 1 << 3      /* Suppress error messages, if any. */
-												dpif: Eliminate ODPPF_* constants from client-visible interface.

Following this commit, the ODPPF_* constants are only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:12:24 -08:00
+								};
-												dpif: Refactor datapath feature detection.

Various functions in ofproto-dpif and dpif-netlink detect support for
features in very similar ways. Refactor their common code to a single
function.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-29 14:00:14 -07:00
+								bool dpif_probe_feature(struct dpif *, const char *name,
-												dpif: Refactor dpif_probe_feature()

Allow actions to be part of the probe. No functional changes.
Future patch will make use this new API.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>

											
										
										
											2017-03-09 11:01:57 -08:00
+								                        const struct ofpbuf *key, const struct ofpbuf *actions,
 								                        const ovs_u128 *ufid);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								int dpif_flow_flush(struct dpif *);
-												dpif: Eliminate ODPPF_* constants from client-visible interface.

Following this commit, the ODPPF_* constants are only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:12:24 -08:00
+								int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                  const struct nlattr *key, size_t key_len,
-												ovs-dpctl: Add mega flow support

Added support to allow mega flow specified and displayed. ovs-dpctl tool
is mainly used as debugging tool.

This patch also implements the low level user space routines to send
and receive mega flow netlink messages. Those netlink suppor
routines are required for forthcoming user space mega flow patches.

Added a unit test to test parsing and display of mega flows.

Ethan contributed the ovs-dpctl mega flow output function.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 07:15:10 +00:00
+								                  const struct nlattr *mask, size_t mask_len,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                  const struct nlattr *actions, size_t actions_len,
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								                  const ovs_u128 *ufid, const unsigned pmd_id,
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								                  struct dpif_flow_stats *);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								int dpif_flow_del(struct dpif *,
 								                  const struct nlattr *key, size_t key_len,
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								                  const ovs_u128 *ufid, const unsigned pmd_id,
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								                  struct dpif_flow_stats *);
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								int dpif_flow_get(struct dpif *,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                  const struct nlattr *key, size_t key_len,
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								                  const ovs_u128 *ufid, const unsigned pmd_id,
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								                  struct ofpbuf *, struct dpif_flow *);
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
 								/* Flow dumping interface
 								 * ======================
 								 *
 								 * This interface allows iteration through all of the flows currently installed
 								 * in a datapath.  It is somewhat complicated by two requirements:
 								 *
 								 *    - Efficient support for dumping flows in parallel from multiple threads.
 								 *
 								 *    - Allow callers to avoid making unnecessary copies of data returned by
 								 *      the interface across several flows in cases where the dpif
 								 *      implementation has to maintain a copy of that information anyhow.
 								 *      (That is, allow the client visibility into any underlying batching as
 								 *      part of its own batching.)
 								 *
 								 *
 								 * Usage
 								 * -----
 								 *
 								 * 1. Call dpif_flow_dump_create().
 								 * 2. In each thread that participates in the dump (which may be just a single
 								 *    thread if parallelism isn't important):
 								 *        (a) Call dpif_flow_dump_thread_create().
 								 *        (b) Call dpif_flow_dump_next() repeatedly until it returns 0.
 								 *        (c) Call dpif_flow_dump_thread_destroy().
 								 * 3. Call dpif_flow_dump_destroy().
 								 *
 								 * All error reporting is deferred to the call to dpif_flow_dump_destroy().
 								 */
-												dpctl: Add an option to dump only certain kinds of flows

Usage:
    # to dump all datapath flows (default):
    ovs-dpctl dump-flows

    # to dump only flows that in kernel datapath:
    ovs-dpctl dump-flows type=ovs

    # to dump only flows that are offloaded:
    ovs-dpctl dump-flows type=offloaded

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2017-06-13 18:03:49 +03:00
+								struct dpif_flow_dump *dpif_flow_dump_create(const struct dpif *, bool terse,
-												dpctl: Expand the flow dump type filter

Added new types to the flow dump filter, and allowed multiple filter
types to be passed at once, as a comma separated list. The new types
added are:
 * tc - specifies flows handled by the tc dp
 * non-offloaded - specifies flows not offloaded to the HW
 * all - specifies flows of all types

The type list is now fully parsed by the dpctl, and a new struct was
added to dpif which enables dpctl to define which types of dumps to
provide, rather than passing the type string and having dpif parse it.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Acked-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-08-10 11:30:08 +03:00
+								                                             struct dpif_flow_dump_types *);
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								int dpif_flow_dump_destroy(struct dpif_flow_dump *);
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								struct dpif_flow_dump_thread *dpif_flow_dump_thread_create(
 								    struct dpif_flow_dump *);
 								void dpif_flow_dump_thread_destroy(struct dpif_flow_dump_thread *);
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								#define PMD_ID_NULL OVS_CORE_UNSPEC
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								/* A datapath flow as dumped by dpif_flow_dump_next(). */
 								struct dpif_flow {
 								    const struct nlattr *key;     /* Flow key, as OVS_KEY_ATTR_* attrs. */
 								    size_t key_len;               /* 'key' length in bytes. */
 								    const struct nlattr *mask;    /* Flow mask, as OVS_KEY_ATTR_* attrs. */
 								    size_t mask_len;              /* 'mask' length in bytes. */
 								    const struct nlattr *actions; /* Actions, as OVS_ACTION_ATTR_ */
 								    size_t actions_len;           /* 'actions' length in bytes. */
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								    ovs_u128 ufid;                /* Unique flow identifier. */
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								    bool ufid_present;            /* True if 'ufid' was provided by datapath.*/
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								    unsigned pmd_id;              /* Datapath poll mode driver id. */
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								    struct dpif_flow_stats stats; /* Flow statistics. */
-												dpctl: Properly reflect a rule's offloaded to HW state

Previously, any rule that is offloaded via a netdev, not necessarily
to the HW, would be reported as "offloaded". This patch fixes this
misalignment, and introduces the 'dp' state, as follows:

rule is in HW via TC offload  -> offloaded=yes dp:tc
rule is in not HW over TC DP  -> offloaded=no  dp:tc
rule is in not HW over OVS DP -> offloaded=no  dp:ovs

To achieve this, the flows's 'offloaded' flag was encapsulated in a new
attrs struct, which contains the offloaded state of the flow and the
DP layer the flow is handled in, and instead of setting the flow's
'offloaded' state based solely on the type of dump it was acquired
via, for netdev flows it now sends the new attrs struct to be
collected along with the rest of the flow via the netdev, allowing
it to be set per flow.

For TC offloads, the offloaded state is set based on the 'in_hw' and
'not_in_hw' flags received from the TC as part of the flower. If no
such flag was received, due to lack of kernel support, it defaults
to true.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Acked-by: Roi Dayan <roid@mellanox.com>
[simon: resolved conflict in lib/dpctl.man]
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-06-07 09:36:59 +03:00
+								    struct dpif_flow_attrs attrs; /* Flow attributes. */
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								};
-												dpif: Refactor flow dumping interface to make better sense for batching.

Commit a6ce4b9d251 (ofproto-dpif-upcall: Avoid use-after-free in
revalidate() corner case.) showed that it is somewhat tricky to correctly
use the existing dpif flow dumping interface to obtain batches of flows.
One has to be careful about calling dpif_flow_dump_next_may_destroy_keys()
before going on to the next flow.

A better interface is possible, one that is naturally oriented toward
retrieving batches when that is a useful optimization.  This commit
replaces the dpif interface by such a design, and updates both the
implementations and the callers to adopt it.

This is a fairly large change, but I think that the code in
ofproto-dpif-upcall is easier to understand after the change.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-20 11:37:02 -07:00
+								int dpif_flow_dump_next(struct dpif_flow_dump_thread *,
 								                        struct dpif_flow *flows, int max_flows);
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
 								#define DPIF_FLOW_BUFSIZE 2048
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
 								/* Operation batching interface.
 								 *
 								 * Some datapaths are faster at performing N operations together than the same
 								 * N operations individually, hence an interface for batching.
 								 */
 								enum dpif_op_type {
 								    DPIF_OP_FLOW_PUT = 1,
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								    DPIF_OP_FLOW_DEL,
 								    DPIF_OP_EXECUTE,
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								    DPIF_OP_FLOW_GET,
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								};
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								/* offload_type argument types to (*operate) interface */
 								enum dpif_offload_type {
 								    DPIF_OFFLOAD_AUTO,         /* Offload if possible, fallback to software. */
 								    DPIF_OFFLOAD_NEVER,        /* Never offload to hardware. */
 								    DPIF_OFFLOAD_ALWAYS,       /* Always offload to hardware. */
 								};
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								/* Add or modify a flow.
 								 *
 								 * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in
 								 * the 'key_len' bytes starting at 'key'.  The associated actions are specified
 								 * by the Netlink attributes with types OVS_ACTION_ATTR_* in the 'actions_len'
 								 * bytes starting at 'actions'.
 								 *
 								 *   - If the flow's key does not exist in the dpif, then the flow will be
 								 *     added if 'flags' includes DPIF_FP_CREATE.  Otherwise the operation will
 								 *     fail with ENOENT.
 								 *
 								 *     If the operation succeeds, then 'stats', if nonnull, will be zeroed.
 								 *
 								 *   - If the flow's key does exist in the dpif, then the flow's actions will
 								 *     be updated if 'flags' includes DPIF_FP_MODIFY.  Otherwise the operation
 								 *     will fail with EEXIST.  If the flow's actions are updated, then its
 								 *     statistics will be zeroed if 'flags' includes DPIF_FP_ZERO_STATS, and
 								 *     left as-is otherwise.
 								 *
 								 *     If the operation succeeds, then 'stats', if nonnull, will be set to the
 								 *     flow's statistics before the update.
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								 *
 								 *   - If the datapath implements multiple pmd thread with its own flow
 								 *     table, 'pmd_id' should be used to specify the particular polling
-												dpctl: Avoid making assumptions on pmd threads.

Currently dpctl depends on ovs-numa module to delete and create flows on
different pmd threads for pmd devices.

The next commits will move away the pmd threads state from ovs-numa to
dpif-netdev, so the ovs-numa interface will not be supported.

Also, the assignment between ports and thread is an implementation
detail of dpif-netdev, dpctl shouldn't know anything about it.

This commit changes the dpif_flow_put() and dpif_flow_del() calls to
iterate over all the pmd threads, if pmd_id is PMD_ID_NULL.

A simple test is added.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2016-11-15 15:40:49 -08:00
+								 *     thread for the operation. PMD_ID_NULL means that the flow should
 								 *     be put on all the polling threads.
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								 */
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								struct dpif_flow_put {
 								    /* Input. */
 								    enum dpif_flow_put_flags flags; /* DPIF_FP_*. */
 								    const struct nlattr *key;       /* Flow to put. */
 								    size_t key_len;                 /* Length of 'key' in bytes. */
-												ovs-dpctl: Add mega flow support

Added support to allow mega flow specified and displayed. ovs-dpctl tool
is mainly used as debugging tool.

This patch also implements the low level user space routines to send
and receive mega flow netlink messages. Those netlink suppor
routines are required for forthcoming user space mega flow patches.

Added a unit test to test parsing and display of mega flows.

Ethan contributed the ovs-dpctl mega flow output function.

Co-authored-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Andy Zhou <azhou@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-19 07:15:10 +00:00
+								    const struct nlattr *mask;      /* Mask to put. */
 								    size_t mask_len;                /* Length of 'mask' in bytes. */
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								    const struct nlattr *actions;   /* Actions to perform on flow. */
 								    size_t actions_len;             /* Length of 'actions' in bytes. */
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								    const ovs_u128 *ufid;           /* Optional unique flow identifier. */
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								    unsigned pmd_id;                /* Datapath poll mode driver id. */
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
 								    /* Output. */
 								    struct dpif_flow_stats *stats;  /* Optional flow statistics. */
 								};
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								/* Delete a flow.
 								 *
 								 * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								 * the 'key_len' bytes starting at 'key', or the unique identifier 'ufid'. If
 								 * the flow was created using 'ufid', then 'ufid' must be specified to delete
 								 * the flow. If both are specified, 'key' will be ignored for flow deletion.
 								 * Succeeds with status 0 if the flow is deleted, or fails with ENOENT if the
 								 * dpif does not contain such a flow.
 								 *
 								 * Callers should always provide the 'key' to improve dpif logging in the event
 								 * of errors or unexpected behaviour.
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								 *
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								 * If the datapath implements multiple polling thread with its own flow table,
 								 * 'pmd_id' should be used to specify the particular polling thread for the
-												dpctl: Avoid making assumptions on pmd threads.

Currently dpctl depends on ovs-numa module to delete and create flows on
different pmd threads for pmd devices.

The next commits will move away the pmd threads state from ovs-numa to
dpif-netdev, so the ovs-numa interface will not be supported.

Also, the assignment between ports and thread is an implementation
detail of dpif-netdev, dpctl shouldn't know anything about it.

This commit changes the dpif_flow_put() and dpif_flow_del() calls to
iterate over all the pmd threads, if pmd_id is PMD_ID_NULL.

A simple test is added.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2016-11-15 15:40:49 -08:00
+								 * operation. PMD_ID_NULL means that the flow should be deleted from all the
 								 * polling threads.
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								 *
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								 * If the operation succeeds, then 'stats', if nonnull, will be set to the
 								 * flow's statistics before its deletion. */
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								struct dpif_flow_del {
 								    /* Input. */
 								    const struct nlattr *key;       /* Flow to delete. */
 								    size_t key_len;                 /* Length of 'key' in bytes. */
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    const ovs_u128 *ufid;           /* Unique identifier of flow to delete. */
-												dpif: Shift ufid support checking up to dpif_backer.

Previously, the dpif layer was responsible for determining datapath
support for UFIDs, which resulted in all ovs-dpctl utilities
inserting/deleting flows from the datapath each time they are run.
Shift this responsibility up to the dpif_backer.

There are two users of this functionality: Revalidators check for UFID
support to request a terser dump using UFIDs, and dpif-netlink uses this
to request flow_del operations to only return the UFID/stats. The latter
case was previously hidden from revalidators, but this change makes them
aware of it, and reuses the same "udpif->enable_ufid" flag for reducing
overhead of both flow dump and flow delete.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>

											
										
										
											2014-12-16 17:44:40 -08:00
+								    bool terse;                     /* OK to skip sending/receiving full flow
 								                                     * info? */
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								    unsigned pmd_id;                /* Datapath poll mode driver id. */
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
 								    /* Output. */
 								    struct dpif_flow_stats *stats;  /* Optional flow statistics. */
 								};
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								/* Executes actions on a specified packet.
 								 *
 								 * Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet
 								 * frame in 'packet' and on the packet metadata in 'md'.  May modify both
 								 * 'packet' and 'md'.
 								 *
 								 * Some dpif providers do not implement every action.  The Linux kernel
 								 * datapath, in particular, does not implement ARP field modification.  If
 								 * 'needs_help' is true, the dpif layer executes in userspace all of the
 								 * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and
 								 * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif
 								 * implementation.
 								 *
 								 * This works even if 'actions_len' is too long for a Netlink attribute. */
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								struct dpif_execute {
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								    /* Input. */
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								    const struct nlattr *actions;   /* Actions to execute on packet. */
 								    size_t actions_len;             /* Length of 'actions' in bytes. */
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
+								    bool needs_help;
-												dpif: Use OVS_FLOW_ATTR_PROBE.

Use the new OVS_FLOW_ATTR_PROBE flag when probing for datapath feature
support.  Suppress also dpif error logging when requested, as probe
failures are already logged at ofproto-dpif.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2014-09-12 11:20:13 -07:00
+								    bool probe;                     /* Suppress error messages. */
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								    unsigned int mtu;               /* Maximum transmission unit to fragment.
 if not a fragmented packet */
-												dpif: Fix use of uninitialized execute hash.

'dpif_execute_helper_cb' doesn't initilalize the 'hash' field that
may be passed down to datapath and might cause execution of a different
set of actions, e.g. on recirculation.

 Thread 6 handler27:
 Conditional jump or move depends on uninitialised value(s)
    at 0x53A2C2: dpif_netlink_encode_execute (dpif-netlink.c:1841)
    by 0x53A2C2: dpif_netlink_operate__ (dpif-netlink.c:1919)
    by 0x53A82D: dpif_netlink_operate_chunks (dpif-netlink.c:2238)
    by 0x53A82D: dpif_netlink_operate (dpif-netlink.c:2297)
    by 0x48135F: dpif_operate (dpif.c:1366)
    by 0x481923: dpif_execute.part.24 (dpif.c:1320)
    by 0x481C46: dpif_execute (dpif.c:1312)
    by 0x481C46: dpif_execute_helper_cb (dpif.c:1243)
    by 0x4AE943: odp_execute_actions (odp-execute.c:865)
    by 0x47F272: dpif_execute_with_help (dpif.c:1296)
    by 0x4812FF: dpif_operate (dpif.c:1422)
    by 0x442226: handle_upcalls (ofproto-dpif-upcall.c:1617)
    by 0x442226: recv_upcalls.isra.36 (ofproto-dpif-upcall.c:855)
    by 0x442351: udpif_upcall_handler (ofproto-dpif-upcall.c:755)
    by 0x4FDE2C: ovsthread_wrapper (ovs-thread.c:383)
    by 0x5E19159: start_thread (in /usr/lib64/libpthread-2.28.so)
    by 0x69ECF72: clone (in /usr/lib64/libc-2.28.so)
  Uninitialised value was created by a stack allocation
    at 0x481966: dpif_execute_helper_cb (dpif.c:1159)

Additionally added a missing comment to the 'struct dpif_execute'.

Fixes: 0442bfb11d6c ("ofproto-dpif-upcall: Echo HASH attribute back to datapath.")
Acked-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-04-04 14:43:52 +02:00
+								    uint64_t hash;                  /* Packet flow hash. 0 if not specified. */
-												dpif: Pass flow parameter to dpif_execute().

All the callers of the function already have a copy of the extracted
flow in their stack (or a few frames before).

This is useful for different resons:
* It forces the callers to also call flow_extract() on the packet, which
  is necessary to initialize the l2,l3,l4 pointers.
* It will be used in the userspace datapath to generate the RSS hash by
  a following commit
* It can be used by the userspace connection tracker to avoid extracting
  the l3 type again.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-05-17 18:26:02 -07:00
+								    const struct flow *flow;         /* Flow extracted from 'packet'. */
-												dpif-provider: Get rid of redundant operations.

The dpif provider 'operate' call duplicates all of the features available
from the 'flow_put', 'flow_del', and 'execute' calls, yielding redundant
code in providers that support both mechanisms.  This change drops the
latter calls in favor of making every dpif provider support 'operate'.
The result is code that is overall less duplicative.

It might make sense to do the same with flow_get but so far 'operate'
doesn't support flow_get.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-15 16:09:40 -07:00
 								    /* Input, but possibly modified as a side effect of execution. */
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								    struct dp_packet *packet;          /* Packet to execute. */
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								};
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								/* Queries the dpif for a flow entry.
 								 *
 								 * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								 * the 'key_len' bytes starting at 'key', or the unique identifier 'ufid'. If
 								 * the flow was created using 'ufid', then 'ufid' must be specified to fetch
 								 * the flow. If both are specified, 'key' will be ignored for the flow query.
 								 * 'buffer' must point to an initialized buffer, with a recommended size of
 								 * DPIF_FLOW_BUFSIZE bytes.
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								 *
-												dpif: Fix leak and usage of uninitialized dp_extra_info.

'dpif_probe_feature'/'revalidate' doesn't free the 'dp_extra_info'
string.  Also, all the implementations of dpif_flow_get() should
initialize the value to avoid printing/freeing of random memory.

 30 bytes in 1 blocks are definitely lost in loss record 323 of 889
    at 0x483AD19: realloc (vg_replace_malloc.c:836)
    by 0xDDAD89: xrealloc (util.c:149)
    by 0xCE1609: ds_reserve (dynamic-string.c:63)
    by 0xCE1A90: ds_put_format_valist (dynamic-string.c:161)
    by 0xCE19B9: ds_put_format (dynamic-string.c:142)
    by 0xCCCEA9: dp_netdev_flow_to_dpif_flow (dpif-netdev.c:3170)
    by 0xCCD2DD: dpif_netdev_flow_get (dpif-netdev.c:3278)
    by 0xCCEA0A: dpif_netdev_operate (dpif-netdev.c:3868)
    by 0xCDF81B: dpif_operate (dpif.c:1361)
    by 0xCDEE93: dpif_flow_get (dpif.c:1002)
    by 0xCDECF9: dpif_probe_feature (dpif.c:962)
    by 0xC635D2: check_recirc (ofproto-dpif.c:896)
    by 0xC65C02: check_support (ofproto-dpif.c:1567)
    by 0xC63274: open_dpif_backer (ofproto-dpif.c:818)
    by 0xC65E3E: construct (ofproto-dpif.c:1605)
    by 0xC4D436: ofproto_create (ofproto.c:549)
    by 0xC3931A: bridge_reconfigure (bridge.c:877)
    by 0xC3FEAC: bridge_run (bridge.c:3324)
    by 0xC4551D: main (ovs-vswitchd.c:127)

CC: Emma Finn <emma.finn@intel.com>
Fixes: 0e8f5c6a38d0 ("dpif-netdev: Modified ovs-appctl dpctl/dump-flows command")
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Roi Dayan <roid@mellanox.com>

											
										
										
											2020-01-17 23:00:05 +01:00
+								 * On success, 'flow' will be populated with the mask, actions, stats and attrs
 								 * for the datapath flow corresponding to 'key'. The mask and actions may point
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								 * within '*buffer', or may point at RCU-protected data. Therefore, callers
 								 * that wish to hold these over quiescent periods must make a copy of these
-												dpif: Fix dp_extra_info leak by reworking the allocation scheme.

dpctl module leaks the 'dp_extra_info' in case the dumped flow doesn't
fit the dump filter while executing dpctl/dump-flows and also while
executing dpctl/get-flow.

This is already a 3rd attempt to fix all the leaks and incorrect usage
of this string that definitely indicates poor initial design of the
feature.

Flow dump/get documentation clearly states that the caller does not own
the data provided in dpif_flow.  Datapath still owns all the data and
promises to not free/modify it until the next quiescent period, however
we're requesting the caller to free 'dp_extra_info' and this obviously
breaks the rules.

This patch fixes the issue by by storing 'dp_extra_info' within
'struct dp_netdev_flow' making datapath to own it.  'dp_netdev_flow'
is RCU-protected, so it will be valid until the next quiescent period.

Fixes: 0e8f5c6a38d0 ("dpif-netdev: Modified ovs-appctl dpctl/dump-flows command")
Tested-by: Emma Finn <emma.finn@intel.com>
Acked-by: Emma Finn <emma.finn@intel.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-01-23 19:10:05 +01:00
+								 * fields before quiescing.
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								 *
-												dpif: Index flows using unique identifiers.

This patch modifies the dpif interface to allow flows to be manipulated
using a 128-bit identifier. This allows revalidator threads to perform
datapath operations faster, as they do not need to serialise the entire
flow key for operations like flow_get and flow_delete. In conjunction
with a future patch to simplify the dump interface, this provides a
significant performance benefit for revalidation.

When handlers assemble flow_put operations, they specify a unique
identifier (UFID) for each flow as it is passed down to the datapath to
be stored with the flow. The UFID is currently provided to handlers
by the dpif during upcall processing.

When revalidators assemble flow_get or flow_del operations, they may
specify the UFID for the flow along with the key. The dpif will decide
whether to send only the UFID to the datapath, or both the UFID and flow
key. The former is preferred for newer datapaths that support UFID,
while the latter is used for backwards compatibility.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 16:26:35 +12:00
+								 * Callers should always provide 'key' to improve dpif logging in the event of
 								 * errors or unexpected behaviour.
 								 *
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								 * If the datapath implements multiple polling thread with its own flow table,
 								 * 'pmd_id' should be used to specify the particular polling thread for the
-												dpctl: Avoid making assumptions on pmd threads.

Currently dpctl depends on ovs-numa module to delete and create flows on
different pmd threads for pmd devices.

The next commits will move away the pmd threads state from ovs-numa to
dpif-netdev, so the ovs-numa interface will not be supported.

Also, the assignment between ports and thread is an implementation
detail of dpif-netdev, dpctl shouldn't know anything about it.

This commit changes the dpif_flow_put() and dpif_flow_del() calls to
iterate over all the pmd threads, if pmd_id is PMD_ID_NULL.

A simple test is added.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2016-11-15 15:40:49 -08:00
+								 * operation. PMD_ID_NULL means that the datapath will return the first
 								 * matching flow from any poll thread.
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								 *
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								 * Succeeds with status 0 if the flow is fetched, or fails with ENOENT if no
 								 * such flow exists. Other failures are indicated with a positive errno value.
 								 */
 								struct dpif_flow_get {
 								    /* Input. */
 								    const struct nlattr *key;       /* Flow to get. */
 								    size_t key_len;                 /* Length of 'key' in bytes. */
-												dpif: Minimize memory copy for revalidation.

One of the limiting factors on the number of flows that can be supported
in the datapath is the overhead of assembling flow dump messages in the
datapath. This patch modifies the dpif to allow revalidators to skip
dumping the key, mask and actions from the datapath, by making use of
the unique flow identifiers introduced in earlier patches.

For each flow dump, the dpif user specifies whether to skip these
attributes, allowing the common case to only dump a pair of 128-bit ID
and flow stats. With datapath support, this increases the number of
flows that a revalidator can handle per second by 50% or more. Support
in dpif-netdev and dpif-netlink is added in this patch; kernel support
is left for future patches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-06 11:14:08 +13:00
+								    const ovs_u128 *ufid;           /* Unique identifier of flow to get. */
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								    unsigned pmd_id;                /* Datapath poll mode driver id. */
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								    struct ofpbuf *buffer;          /* Storage for output parameters. */
 								    /* Output. */
 								    struct dpif_flow *flow;         /* Resulting flow from datapath. */
 								};
-												dpif: Use explicit packet metadata.

This helps reduce confusion about when a flow is a flow and when it is
just metadata.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-30 15:58:58 -08:00
+								int dpif_execute(struct dpif *, struct dpif_execute *);
-												dpif: Factor 'type' and 'error' out of individual dpif_op members.

I'd like to change ->dpif_flow_put() and ->dpif_execute() in the dpif
provider to take the structures of the same names as parameters, instead of
passing them discrete parameters, because this seems like a more sensible
way to do things internally than to have two different ways to pass the
parameters.  It might even simplify code slightly.  But ->flow_put() and
->execute() wouldn't want the 'type' (because it's implied by the function
being called) or 'error' (because it would be the same as the return
value).  Although of course they could just ignore those members, it seems
slightly cleaner to omit them entirely, as this change allows.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:17:55 -08:00
+								struct dpif_op {
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								    enum dpif_op_type type;
-												dpif: Factor 'type' and 'error' out of individual dpif_op members.

I'd like to change ->dpif_flow_put() and ->dpif_execute() in the dpif
provider to take the structures of the same names as parameters, instead of
passing them discrete parameters, because this seems like a more sensible
way to do things internally than to have two different ways to pass the
parameters.  It might even simplify code slightly.  But ->flow_put() and
->execute() wouldn't want the 'type' (because it's implied by the function
being called) or 'error' (because it would be the same as the return
value).  Although of course they could just ignore those members, it seems
slightly cleaner to omit them entirely, as this change allows.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:17:55 -08:00
+								    int error;
 								    union {
 								        struct dpif_flow_put flow_put;
-												ofproto-dpif: Batch flow uninstallations due to expiration.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-04-17 21:52:10 -07:00
+								        struct dpif_flow_del flow_del;
-												dpif: Factor 'type' and 'error' out of individual dpif_op members.

I'd like to change ->dpif_flow_put() and ->dpif_execute() in the dpif
provider to take the structures of the same names as parameters, instead of
passing them discrete parameters, because this seems like a more sensible
way to do things internally than to have two different ways to pass the
parameters.  It might even simplify code slightly.  But ->flow_put() and
->execute() wouldn't want the 'type' (because it's implied by the function
being called) or 'error' (because it would be the same as the return
value).  Although of course they could just ignore those members, it seems
slightly cleaner to omit them entirely, as this change allows.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2011-12-26 14:17:55 -08:00
+								        struct dpif_execute execute;
-												dpif: Support flow_get in dpif_operate().

This cleans up the dpif interface to make it more consistent with the
other dpif operations, and allows flows to be fetched in batches.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-08-13 09:55:54 +12:00
+								        struct dpif_flow_get flow_get;
-												Embrace anonymous unions.

Several OVS structs contain embedded named unions, like this:

struct {
    ...
    union {
        ...
    } u;
};

C11 standardized a feature that many compilers already implemented
anyway, where an embedded union may be unnamed, like this:

struct {
    ...
    union {
        ...
    };
};

This is more convenient because it allows the programmer to omit "u."
in many places.  OVS already used this feature in several places.  This
commit embraces it in several others.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: Alin Gabriel Serdean <aserdean@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@ovn.org>

											
										
										
											2018-05-24 10:32:59 -07:00
+								    };
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								};
-												revalidator: Rebalance offloaded flows based on the pps rate

This is the third patch in the patch-set to support dynamic rebalancing
of offloaded flows.

The dynamic rebalancing functionality is implemented in this patch. The
ukeys that are not scheduled for deletion are obtained and passed as input
to the rebalancing routine. The rebalancing is done in the context of
revalidation leader thread, after all other revalidator threads are
done with gathering rebalancing data for flows.

For each netdev that is in OOR state, a list of flows - both offloaded
and non-offloaded (pending) - is obtained using the ukeys. For each netdev
that is in OOR state, the flows are grouped and sorted into offloaded and
pending flows.  The offloaded flows are sorted in descending order of
pps-rate, while pending flows are sorted in ascending order of pps-rate.

The rebalancing is done in two phases. In the first phase, we try to
offload all pending flows and if that succeeds, the OOR state on the device
is cleared. If some (or none) of the pending flows could not be offloaded,
then we start replacing an offloaded flow that has a lower pps-rate than
a pending flow, until there are no more pending flows with a higher rate
than an offloaded flow. The flows that are replaced from the device are
added into kernel datapath.

A new OVS configuration parameter "offload-rebalance", is added to ovsdb.
The default value of this is "false". To enable this feature, set the
value of this parameter to "true", which provides packets-per-second
rate based policy to dynamically offload and un-offload flows.

Note: This option can be enabled only when 'hw-offload' policy is enabled.
It also requires 'tc-policy' to be set to 'skip_sw'; otherwise, flow
offload errors (specifically ENOSPC error this feature depends on) reported
by an offloaded device are supressed by TC-Flower kernel module.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Co-authored-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Signed-off-by: Venkat Duvvuru <venkatkumar.duvvuru@broadcom.com>
Reviewed-by: Sathya Perla <sathya.perla@broadcom.com>
Reviewed-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2018-10-18 21:43:14 +05:30
+								void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops,
 								                  enum dpif_offload_type);
-												dpctl: Add function to read hardware offload statistics.

Expose a function to query datapath offload statistics.
This function is separate from the current one in netdev-offload
as it exposes more detailed statistics from the datapath, instead of
only from the netdev-offload provider.

Each datapath is meant to use the custom counters as it sees fit for its
handling of hardware offloads.

Call the new API from dpctl.

Signed-off-by: Gaetan Rivet <grive@u256.net>
Reviewed-by: Eli Britstein <elibr@nvidia.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-09-08 11:47:31 +02:00
 								/* Queries the datapath for hardware offloads stats.
 								 *
 								 * Statistics are written in 'stats' following the 'netdev_custom_stats'
 								 * format. They are allocated on the heap and must be freed by the caller,
 								 * using 'netdev_free_custom_stats_counters'.
 								 */
 								int dpif_offload_stats_get(struct dpif *dpif,
 								                           struct netdev_custom_stats *stats);
-												dpif: Restore a few lines with form feed characters

A few lines with form feed characters (ASCII: ^L) were accidentally
deleted by a recent commit to support rebalancing of offloaded flows.
This patch reverts those lines.

Fixes: 57924fc91c ("revalidator: Rebalance offloaded flows")
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-11-01 00:51:41 +05:30
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								/* Upcalls. */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												Eliminate ODPL_* from userspace-facing interface.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:14:04 -08:00
+								enum dpif_upcall_type {
 								    DPIF_UC_MISS,               /* Miss in flow table. */
-												datapath: Use "OVS_*" as opposed to "ODP_*" for user<->kernel interactions.

The prefix "ODP_*" is not overly descriptive in the context of the
larger Linux tree.  This commit changes the prefix to "OVS_*" for the
userpace to kernel interactions.  The userspace libraries still use
"ODP_" in many of their interfaces since it is more descriptive in the
OVS oeuvre.

Feature #6904

Signed-off-by: Justin Pettit <jpettit@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-08-18 10:35:40 -07:00
+								    DPIF_UC_ACTION,             /* OVS_ACTION_ATTR_USERSPACE action. */
-												datapath: Convert upcalls and ODP_EXECUTE to use AF_NETLINK socket layer.

This commit calls genl_lock() and thus doesn't support Linux before
2.6.35, which wasn't exported before that version.  That problem will
be fixed once the whole userspace interface transitions to Generic
Netlink a few commits from now.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 13:41:54 -08:00
+								    DPIF_N_UC_TYPES
-												Eliminate ODPL_* from userspace-facing interface.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:14:04 -08:00
+								};
-												dpif: Improve logging of upcalls.

The kernel now provides the entire flow key for a packet sent up to
userspace, but dpif_recv() would only log the in_port.  This change makes
userspace log the entire flow key.

This would have made a bug that I recently looked at a bit easier to
investigate.

											
										
										
											2011-06-08 14:03:47 -07:00
+								const char *dpif_upcall_type_to_string(enum dpif_upcall_type);
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								/* A packet passed up from the datapath to userspace.
 								 *
-												dpif: Allow execute to modify the packet.

Allowing the packet to be modified by execution allows less data
copying for userspace action execution.  Some users of the
dpif_execute already expect that the packet may be modified.  This
patch makes this behavior uniform and makes the userspace datapath and
the execution helpers modify the packet as it is being executed.
Userspace action now steals the packet if given permission, as the
packet is normally not needed after it.  The only exception is the
sample action, and this is accounted for my keeping track of any
actions that could be following the userspace action.

The packet in dpif_upcall is changed from a pointer to a struct,
allowing the packet to be honest about it's headroom.  After this
change the packet can safely be pushed on over the precarious 4 byte
limit earlier allowed by the netlink data preceding the packet.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
											
										
										
											2013-12-16 08:14:52 -08:00
+								 * The 'packet', 'key' and 'userdata' may point into data in a buffer
 								 * provided by the caller, so the buffer should be released only after the
 								 * upcall processing has been finished.
 								 *
 								 * While being processed, the 'packet' may be reallocated, so the packet must
 								 * be separately released with ofpbuf_uninit().
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								 */
 								struct dpif_upcall {
 								    /* All types. */
-												dpif: Reorder elements in dpif_upcall structure.

By reordering the data elements in dpif_upcall structure, pad bytes can
be reduced and also a cache line. Also dp_packet should be the first
member of the structure because rte_mbuf, the first member of dp_packet
should be aligned atleast on a 64-byte boundary.

Before: structure size:768, holes:1, sum padbytes:60, cachelines:12
After: structure size:704, holes:1, sum padbytes:4, cachelines:11

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fischetti@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fischetti@intel.com>
Acked-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-10-14 15:37:12 +01:00
+								    struct dp_packet packet;    /* Packet data,'dp_packet' should be the first
-												dpif: Use spaces instead of tabs.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Reviewed-by: Greg Rose <gvrose8192@gmail.com>

											
										
										
											2017-07-19 18:19:55 -07:00
+								                                   member to avoid a hole. This is because
 								                                   'rte_mbuf' in dp_packet is aligned atleast
 								                                   on a 64-byte boundary */
-												Eliminate ODPL_* from userspace-facing interface.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:14:04 -08:00
+								    enum dpif_upcall_type type;
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								    struct nlattr *key;         /* Flow key. */
 								    size_t key_len;             /* Length of 'key' in bytes. */
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								    ovs_u128 ufid;              /* Unique flow identifier for 'key'. */
-												dpif-netlink: Allow MRU packet attribute.

User space now may receive re-assembled IP fragments. The user space
netlink handler can now accept packets with the new OVS_PACKET_ATTR_MRU
attribute. This allows the kernel to assemble fragmented packets for the
duration of OpenFlow processing, then re-fragment at output time. Most
notably this occurs for packets that are sent through the connection
tracker.

Note that the MRU attribute is not exported at the OpenFlow layer. As
such, if packets are reassembled by conntrack and subsequently sent to
the controller, then OVS has no way to re-serialize the packets to their
original size.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-26 15:52:34 -08:00
+								    struct nlattr *mru;         /* Maximum receive unit. */
-												ofproto-dpif-upcall: Echo HASH attribute back to datapath.

The kernel datapath may sent upcall with hash info,
ovs-vswitchd should get it from upcall and then send
it back.

The reason is that:
| When using the kernel datapath, the upcall don't
| include skb hash info relatived. That will introduce
| some problem, because the hash of skb is important
| in kernel stack. For example, VXLAN module uses
| it to select UDP src port. The tx queue selection
| may also use the hash in stack.
|
| Hash is computed in different ways. Hash is random
| for a TCP socket, and hash may be computed in hardware,
| or software stack. Recalculation hash is not easy.
|
| There will be one upcall, without information of skb
| hash, to ovs-vswitchd, for the first packet of a TCP
| session. The rest packets will be processed in Open vSwitch
| modules, hash kept. If this tcp session is forward to
| VXLAN module, then the UDP src port of first tcp packet
| is different from rest packets.
|
| TCP packets may come from the host or dockers, to Open vSwitch.
| To fix it, we store the hash info to upcall, and restore hash
| when packets sent back.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=bd1903b7c4596ba6f7677d0dfefd05ba5876707d
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-11-15 10:58:59 +08:00
+								    struct nlattr *hash;        /* Packet hash. */
-												ofp-actions: Add truncate action.

The patch adds a new action to support packet truncation.  The new action
is formatted as 'output(port=n,max_len=m)', as output to port n, with
packet size being MIN(original_size, m).

One use case is to enable port mirroring to send smaller packets to the
destination port so that only useful packet information is mirrored/copied,
saving some performance overhead of copying entire packet payload.  Example
use case is below as well as shown in the testcases:

    - Output to port 1 with max_len 100 bytes.
    - The output packet size on port 1 will be MIN(original_packet_size, 100).
    # ovs-ofctl add-flow br0 'actions=output(port=1,max_len=100)'

    - The scope of max_len is limited to output action itself.  The following
      packet size of output:1 and output:2 will be intact.
    # ovs-ofctl add-flow br0 \
            'actions=output(port=1,max_len=100),output:1,output:2'
    - The Datapath actions shows:
    # Datapath actions: trunc(100),1,1,2

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140037134
Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>

											
										
										
											2016-06-24 07:42:30 -07:00
+								    struct nlattr *cutlen;      /* Number of bytes shrink from the end. */
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
-												Eliminate ODPL_* from userspace-facing interface.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:14:04 -08:00
+								    /* DPIF_UC_ACTION only. */
-												Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.

Until now, the optional OVS_USERSPACE_ATTR_USERDATA attribute had to be
exactly 64 bits long, if it was present.  However, 64 bits is not enough
space to associate as much information with a flow as would be convenient
for some userspace features now under development.  This commit generalizes
the attribute, allowing it to be any length.

This generalization is backward-compatible: if userspace only uses 64-bit
attributes, then it will not see any change in behavior.

CC: Romain Lenglet <rlenglet@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2013-02-15 16:48:32 -08:00
+								    struct nlattr *userdata;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
-												Extend OVS IPFIX exporter to export tunnel headers

Extend IPFIX exporter to export tunnel headers when both input and output
of the port.
Add three other_config options in IPFIX table: enable-input-sampling,
enable-output-sampling and enable-tunnel-sampling, to control whether
sampling tunnel info, on which direction (input or output).
Insert sampling action before output action and the output tunnel port
is sent to datapath in the sampling action.
Make datapath collect output tunnel info and send it back to userpace
in upcall message with a new additional optional attribute.
Add a tunnel ports map to make the tunnel port lookup faster in sampling
upcalls in IPFIX exporter. Make the IPFIX exporter generate IPFIX template
sets with enterprise elements for the tunnel info, save the tunnel info
in IPFIX cache entries, and send IPFIX DATA with tunnel info.
Add flowDirection element in IPFIX templates.

Signed-off-by: Wenyu Zhang <wenyuz@vmware.com>
Acked-by: Romain Lenglet <rlenglet@vmware.com>
Acked-by: Ben Pfaff <blp@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-08-17 20:19:36 -07:00
+								    struct nlattr *out_tun_key;    /* Output tunnel key. */
-												Extend sFlow agent to report tunnel and MPLS structures

Packets are still sampled at ingress only, so the egress
tunnel and/or MPLS structures are only included when there is just 1 output
port.  The actions are either provided by the datapath in the sample upcall
or looked up in the userspace cache.  The former is preferred because it is
more reliable and does not present any new demands or constraints on the
userspace cache, however the code falls back on the userspace lookup so that
this solution can work with existing kernel datapath modules. If the lookup
fails it is not critical: the compiled user-action-cookie is still available
and provides the essential output port and output VLAN forwarding information
just as before.

The openvswitch actions can express almost any tunneling/mangling so the only
totally faithful representation would be to somehow encode the whole list of
flow actions in the sFlow output.  However the standard sFlow tunnel structures
can express most common real-world scenarios, so in parsing the actions we
look for those and skip the encoding if we see anything unusual. For example,
a single set(tunnel()) or tnl_push() is interpreted,  but if a second such
action is encountered then the egress tunnel reporting is suppressed.

The sFlow standard allows "best effort" encoding so that if a field is not
knowable or too onerous to look up then it can be left out. This is often
the case for the layer-4 source port or even the src ip address of a tunnel.
The assumption is that monitoring is enabled everywhere so a missing field
can typically be seen at ingress to the next switch in the path.

This patch also adds unit tests to check the sFlow encoding of set(tunnel()),
tnl_push() and push_mpls() actions.

The netlink attribute to request that actions be included in the upcall
from the datapath is inserted for sFlow sampling only.  To make that option
be explicit would require further changes to the printing and parsing of
actions in lib/odp-util.c, and to scripts in the test suite.

Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT
transformations can follow in future patches that make only incremental
changes.

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
[blp@nicira.com made stylistic and semantic changes]
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-07-17 21:37:02 -07:00
+								    struct nlattr *actions;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								};
-												ofproto: Avoid user->kernel->user round-trip for many controller actions.

When an OpenFlow flow says to send packets to the controller, until now
ofproto has executed that using dpif_execute(), which passes the packet up
to the kernel.  The kernel queues the packet into its "action" queue, and
then later ofproto pulls the packet back down from the kernel and sends it
to the controller.

However, this is unnecessary.  Open vSwitch can just recognize in advance
that it will get the packet back and handle it directly, skipping the round
trip.  This commit implements this optimization.

This generally affects only the first packet in a flow, since generally the
rest come directly down from the kernel.  It only optimizes the "easy" case
where the first action in a flow is to send the packet to the controller,
since this seems to be the common case in the flows that I'm looking at
now.

											
										
										
											2010-08-04 14:08:26 -07:00
-												dpif-netdev: Purge all ukeys when reconfigure pmd.

When dpdk configuration changes, all pmd threads are recreated
and rx queues of each port are reloaded.  After this process,
rx queue could be mapped to a different pmd thread other than
the one before reconfiguration.  However, this is totally
transparent to ofproto layer modules.  So, if the ofproto-dpif-upcall
module still holds ukeys generated before pmd thread recreation,
this old ukey will collide with the ukey for the new upcalls
from same traffic flow, causing flow installation failure.

To fix the bug, this commit adds a new call-back function
in dpif layer for notifying upper layer the purging of datapath
(e.g. pmd thread deletion in dpif-netdev).  So, the
ofproto-dpif-upcall module can react properly with deleting
the ukeys and with collecting flows' last stats.

Reported-by: Ilya Maximets <i.maximets@samsung.com>
Signed-off-by: Alex Wang <ee07b291@gmail.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>
Tested-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Joe Stringer <joestringer@nicira.com>

											
										
										
											2015-08-25 16:36:46 -07:00
+								/* A callback to notify higher layer of dpif about to be purged, so that
 								 * higher layer could try reacting to this (e.g. grabbing all flow stats
 								 * before they are gone).  This function is currently implemented only by
 								 * dpif-netdev.
 								 *
 								 * The caller needs to provide the 'aux' pointer passed down by higher
 								 * layer from the dpif_register_notify_cb() function and the 'pmd_id' of
 								 * the polling thread.
 								 */
 								    typedef void dp_purge_callback(void *aux, unsigned pmd_id);
 								void dpif_register_dp_purge_cb(struct dpif *, dp_purge_callback *, void *aux);
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								/* A callback to process an upcall, currently implemented only by dpif-netdev.
 								 *
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								 * The caller provides the 'packet' and 'flow' to process, the corresponding
-												dpif: Turn dpif_flow_hash function into generic odp_flow_key_hash.

Current implementation of dpif_flow_hash() doesn't depend on datapath
interface and only complicates the callers by forcing them to figure
out what is their current 'dpif'.  If we'll need different hashing
for different 'dpif's we'll implement an API for dpif-providers
and each dpif implementation will be able to use their local function
directly without calling it via dpif API.

This change will allow us to not store 'dpif' pointer in the userspace
datapath implementation which is broken and will be removed in next
commits.

This patch moves dpif_flow_hash() to odp-util module and replaces
unused odp_flow_key_hash() by it, along with removing of unused 'dpif'
argument.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-12-08 18:09:53 +01:00
+								 * 'ufid' as generated by odp_flow_key_hash(), the polling thread id 'pmd_id',
-												dpif-netdev: Add per-pmd flow-table/classifier.

This commit changes the per dpif-netdev datapath flow-table/
classifier to per pmd-thread.  As direct benefit, datapath
and flow statistics no longer need to be protected by mutex
or be declared as per-thread variable, since they are only
written by the owning pmd thread.

As side effects, the flow-dump output of userspace datapath
can contain overlapping flows.  To reduce confusion, the dump
from different pmd thread will be separated by a title line.
In addition, the flow operations via 'ovs-appctl dpctl/*'
are modified so that if the given flow in_port corresponds
to a dpdk interface, the operation will be conducted to all
pmd threads recv from that interface (expect for flow-get
which will always be applied to non-pmd threads).

Signed-off-by: Alex Wang <alexw@nicira.com>
Tested-by: Mark D. Gray <mark.d.gray@intel.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>

											
										
										
											2014-10-12 18:18:47 -07:00
+								 * the 'type' of the upcall, and if 'type' is DPIF_UC_ACTION then the
 								 * 'userdata' attached to the action.
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								 *
 								 * The callback must fill in 'actions' with the datapath actions to apply to
 								 * 'packet'.  'wc' and 'put_actions' will either be both null or both nonnull.
 								 * If they are nonnull, then the caller will install a flow entry to process
 								 * all future packets that match 'flow' and 'wc'; the callback must store a
 								 * wildcard mask suitable for that purpose into 'wc'.  If the actions to store
 								 * into the flow entry are the same as 'actions', then the callback may leave
 								 * 'put_actions' empty; otherwise it must store the desired actions into
 								 * 'put_actions'.
 								 *
 								 * Returns 0 if successful, ENOSPC if the flow limit has been reached and no
 								 * flow should be installed, or some otherwise a positive errno value. */
-												dp-packet: Remove ofpbuf dependency.

Currently dp-packet make use of ofpbuf for managing packet
buffers. That complicates ofpbuf, by making dp-packet
independent of ofpbuf both libraries can be optimized for
their own use case.
This avoids mapping operation between ofpbuf and dp_packet
in datapath upcalls.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2015-02-22 03:21:09 -08:00
+								typedef int upcall_callback(const struct dp_packet *packet,
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								                            const struct flow *flow,
-												dpif: Generate flow_hash for revalidators in dpif.

This patch shifts the responsibility for determining the hash for a flow
from the revalidation logic down to the dpif layer. This assists in
handling backward-compatibility for revalidation with the upcoming
unique flow identifier "UFID" patches.

A 128-bit UFID was selected to minimize the likelihood of hash conflicts.
Handler threads will not install a flow that has an identical UFID as
another flow, to prevent misattribution of stats and to ensure that the
correct flow key cache is used for revalidation.

For datapaths that do not support UFID, which is currently all
datapaths, the dpif will generate the UFID and pass it up during upcall
and flow_dump. This is generated based on the datapath flow key.

Later patches will add support for datapaths to store and interpret this
UFID, in which case the dpif has a responsibility to pass it through
transparently.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-09-24 15:24:39 +12:00
+								                            ovs_u128 *ufid,
-												ovs-numa: Change 'core_id' to unsigned.

DPDK lcore_id is unsigned.  We need to support big values like
LCORE_ID_ANY (=UINT32_MAX).  Therefore I am changing the type everywhere
in OVS.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2015-05-22 17:14:19 +01:00
+								                            unsigned pmd_id,
-												dpif-netdev: Streamline miss handling.

This patch avoids the relatively inefficient miss handling processes
dictated by the dpif process, by calling into ofproto-dpif directly
through a callback.

Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-07-26 15:39:58 -07:00
+								                            enum dpif_upcall_type type,
 								                            const struct nlattr *userdata,
 								                            struct ofpbuf *actions,
 								                            struct flow_wildcards *wc,
 								                            struct ofpbuf *put_actions,
 								                            void *aux);
 								void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
-												dpif: Simplify the "listen mask" concept.

At one point in the past, there were three separate queues between the
kernel module and OVS userspace, each of which corresponded to a Netlink
socket (or, before that, to a character device).  It made sense to allow
each of these to be enabled or disabled separately, hence the "listen mask"
concept in the dpif layer.

These days, the concept is much less clear-cut.  Queuing is no longer on
the basis of different classes of packets but instead striped across a
collection of sockets based on input port.  It doesn't really make sense
to enable receiving packets on the basis of the kind of packet anymore.
Accordingly, this commit simplifies the "listen_mask" to just a bool that
either enables or disables receiving packets.

It could be useful to enable or disable receiving packets on a per-vport
basis, but the rest of the code isn't ready to make use of that so this
commit doesn't generalize this much.

Based on this discussion on ovs-dev:
http://openvswitch.org/pipermail/dev/2011-October/012044.html

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-01-12 17:09:22 -08:00
+								int dpif_recv_set(struct dpif *, bool enable);
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								int dpif_handlers_set(struct dpif *, uint32_t n_handlers);
-												dpif-netlink: Introduce per-cpu upcall dispatch.

The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=1834444)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray <mark.d.gray@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Aaron Conole <aconole@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-07-16 06:17:36 -04:00
+								bool dpif_number_handlers_required(struct dpif *, uint32_t *n_handlers);
-												dpif-netdev: Pass Openvswitch other_config smap to dpif.

Currently we parse the 'other_config' column in Openvswitch table in
bridge.c.  We extract the values (just 'pmd-cpu-mask' for now) and we
pass them down to the datapath, via different layers.

If we want to pass other values to dpif-netdev.c (like we recently
discussed) we would have to touch ofproto.c, ofproto-dpif.c and dpif.c.

This patch sends the entire other_config column to dpif-netdev, so that
dpif-netdev can extract the values it's interested in.

No functional change.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-01-27 16:41:36 -08:00
+								int dpif_set_config(struct dpif *, const struct smap *cfg);
-												bridge: Pass interface's configuration to datapath.

This commit adds functionality to pass value of 'other_config' column
of 'Interface' table to datapath.

This may be used to pass not directly connected with netdev options and
configure behaviour of the datapath for different ports.
For example: pinning of rx queues to polling threads in dpif-netdev.

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
Acked-by: Daniele Di Proietto <diproiettod@vmware.com>

											
										
										
											2016-07-27 17:44:42 +03:00
+								int dpif_port_set_config(struct dpif *, odp_port_t, const struct smap *cfg);
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *,
 								              struct ofpbuf *);
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								void dpif_recv_purge(struct dpif *);
-												dpif: Change dpif API to allow multiple handler threads read upcall.

This commit changes the API in 'dpif-provider.h' to allow multiple
handler threads call dpif_recv() simultaneously.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-03-07 10:57:36 -08:00
+								void dpif_recv_wait(struct dpif *, uint32_t handler_id);
-												dpif-netdev: Polling threads directly call ofproto upcall functions.

Typically, kernel datapath threads send upcalls to userspace where
handler threads process the upcalls. For TAP and DPDK devices, the
datapath threads operate in userspace, so there is no need for
separate handler threads.

This patch allows userspace datapath threads to directly call the
ofproto upcall functions, eliminating the need for handler threads
for datapaths of type 'netdev'.

Signed-off-by: Ryan Wilson <wryan@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2014-07-26 06:51:55 +00:00
+								void dpif_enable_upcall(struct dpif *);
 								void dpif_disable_upcall(struct dpif *);
 								void dpif_print_packet(struct dpif *, struct dpif_upcall *);
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
-												dpif: Meter framework.

Add DPIF-level infrastructure for meters.  Allow meter_set to modify
the meter configuration (e.g. set the burst size if unspecified).

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Andy Zhou <azhou@ovn.org>

											
										
										
											2017-02-23 11:27:54 -08:00
+								/* Meters. */
 								void dpif_meter_get_features(const struct dpif *,
 								                             struct ofputil_meter_features *);
-												dpif: Don't pass in '*meter_id' to meter_set commands.

The original intent of the API appears to be that the underlying DPIF
implementaion would choose a local meter id.  However, neither of the
existing datapath meter implementations (userspace or Linux) implemented
that; they expected a valid meter id to be passed in, otherwise they
returned an error.  This commit follows the existing implementations and
makes the API somewhat cleaner.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2018-08-07 19:51:26 -07:00
+								int dpif_meter_set(struct dpif *, ofproto_meter_id meter_id,
-												dpif: Meter framework.

Add DPIF-level infrastructure for meters.  Allow meter_set to modify
the meter configuration (e.g. set the burst size if unspecified).

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Andy Zhou <azhou@ovn.org>

											
										
										
											2017-02-23 11:27:54 -08:00
+								                   struct ofputil_meter_config *);
 								int dpif_meter_get(const struct dpif *, ofproto_meter_id meter_id,
 								                   struct ofputil_meter_stats *, uint16_t n_bands);
 								int dpif_meter_del(struct dpif *, ofproto_meter_id meter_id,
 								                   struct ofputil_meter_stats *, uint16_t n_bands);
-												userspace: Avoid dp_hash recirculation for balance-tcp bond mode.

Problem:

In OVS, flows with output over a bond interface of type “balance-tcp”
gets translated by the ofproto layer into "HASH" and "RECIRC" datapath
actions. After recirculation, the packet is forwarded to the bond
member port based on 8-bits of the datapath hash value computed through
dp_hash. This causes performance degradation in the following ways:

1. The recirculation of the packet implies another lookup of the
packet’s flow key in the exact match cache (EMC) and potentially
Megaflow classifier (DPCLS). This is the biggest cost factor.

2. The recirculated packets have a new “RSS” hash and compete with the
original packets for the scarce number of EMC slots. This implies more
EMC misses and potentially EMC thrashing causing costly DPCLS lookups.

3. The 256 extra megaflow entries per bond for dp_hash bond selection
put additional load on the revalidation threads.

Owing to this performance degradation, deployments stick to “balance-slb”
bond mode even though it does not do active-active load balancing for
VXLAN- and GRE-tunnelled traffic because all tunnel packet have the
same source MAC address.

Proposed optimization:

This proposal introduces a new load-balancing output action instead of
recirculation.

Maintain one table per-bond (could just be an array of uint16's) and
program it the same way internal flows are created today for each
possible hash value (256 entries) from ofproto layer. Use this table to
load-balance flows as part of output action processing.

Currently xlate_normal() -> output_normal() ->
bond_update_post_recirc_rules() -> bond_may_recirc() and
compose_output_action__() generate 'dp_hash(hash_l4(0))' and
'recirc(<RecircID>)' actions. In this case the RecircID identifies the
bond. For the recirculated packets the ofproto layer installs megaflow
entries that match on RecircID and masked dp_hash and send them to the
corresponding output port.

Instead, we will now generate action as
    'lb_output(<bond id>)'

This combines hash computation (only if needed, else re-use RSS hash)
and inline load-balancing over the bond. This action is used *only* for
balance-tcp bonds in userspace datapath (the OVS kernel datapath
remains unchanged).

Example:
Current scheme:

With 8 UDP flows (with random UDP src port):

  flow-dump from pmd on cpu core: 2
  recirc_id(0),in_port(7),<...> actions:hash(hash_l4(0)),recirc(0x1)

  recirc_id(0x1),dp_hash(0xf8e02b7e/0xff),<...> actions:2
  recirc_id(0x1),dp_hash(0xb236c260/0xff),<...> actions:1
  recirc_id(0x1),dp_hash(0x7d89eb18/0xff),<...> actions:1
  recirc_id(0x1),dp_hash(0xa78d75df/0xff),<...> actions:2
  recirc_id(0x1),dp_hash(0xb58d846f/0xff),<...> actions:2
  recirc_id(0x1),dp_hash(0x24534406/0xff),<...> actions:1
  recirc_id(0x1),dp_hash(0x3cf32550/0xff),<...> actions:1

New scheme:
We can do with a single flow entry (for any number of new flows):

  in_port(7),<...> actions:lb_output(1)

A new CLI has been added to dump datapath bond cache as given below.

 # ovs-appctl dpif-netdev/bond-show [dp]

   Bond cache:
     bond-id 1 :
       bucket 0 - slave 2
       bucket 1 - slave 1
       bucket 2 - slave 2
       bucket 3 - slave 1

Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Signed-off-by: Vishal Deep Ajmera <vishal.deep.ajmera@ericsson.com>
Tested-by: Matteo Croce <mcroce@redhat.com>
Tested-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-05-22 10:50:05 +02:00
 								/* Bonding. */
 								/* Bit-mask for hashing a flow down to a bucket. */
 								#define BOND_MASK 0xff
 								#define BOND_BUCKETS (BOND_MASK + 1)
-												Eliminate use of term "slave" in bond, LACP, and bundle contexts.

The new term is "member".

Most of these changes should not change user-visible behavior.  One
place where they do is in "ovs-ofctl dump-flows", which will now output
"members:..." inside "bundle" actions instead of "slaves:...".  I don't
expect this to cause real problems in most systems.  The old syntax
is still supported on input for backward compatibility.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Alin Gabriel Serdean <aserdean@cloudbasesolutions.com>

											
										
										
											2020-06-17 14:16:08 -07:00
+								int dpif_bond_add(struct dpif *, uint32_t bond_id, odp_port_t *member_map);
-												userspace: Avoid dp_hash recirculation for balance-tcp bond mode.

Problem:

In OVS, flows with output over a bond interface of type “balance-tcp”
gets translated by the ofproto layer into "HASH" and "RECIRC" datapath
actions. After recirculation, the packet is forwarded to the bond
member port based on 8-bits of the datapath hash value computed through
dp_hash. This causes performance degradation in the following ways:

1. The recirculation of the packet implies another lookup of the
packet’s flow key in the exact match cache (EMC) and potentially
Megaflow classifier (DPCLS). This is the biggest cost factor.

2. The recirculated packets have a new “RSS” hash and compete with the
original packets for the scarce number of EMC slots. This implies more
EMC misses and potentially EMC thrashing causing costly DPCLS lookups.

3. The 256 extra megaflow entries per bond for dp_hash bond selection
put additional load on the revalidation threads.

Owing to this performance degradation, deployments stick to “balance-slb”
bond mode even though it does not do active-active load balancing for
VXLAN- and GRE-tunnelled traffic because all tunnel packet have the
same source MAC address.

Proposed optimization:

This proposal introduces a new load-balancing output action instead of
recirculation.

Maintain one table per-bond (could just be an array of uint16's) and
program it the same way internal flows are created today for each
possible hash value (256 entries) from ofproto layer. Use this table to
load-balance flows as part of output action processing.

Currently xlate_normal() -> output_normal() ->
bond_update_post_recirc_rules() -> bond_may_recirc() and
compose_output_action__() generate 'dp_hash(hash_l4(0))' and
'recirc(<RecircID>)' actions. In this case the RecircID identifies the
bond. For the recirculated packets the ofproto layer installs megaflow
entries that match on RecircID and masked dp_hash and send them to the
corresponding output port.

Instead, we will now generate action as
    'lb_output(<bond id>)'

This combines hash computation (only if needed, else re-use RSS hash)
and inline load-balancing over the bond. This action is used *only* for
balance-tcp bonds in userspace datapath (the OVS kernel datapath
remains unchanged).

Example:
Current scheme:

With 8 UDP flows (with random UDP src port):

  flow-dump from pmd on cpu core: 2
  recirc_id(0),in_port(7),<...> actions:hash(hash_l4(0)),recirc(0x1)

  recirc_id(0x1),dp_hash(0xf8e02b7e/0xff),<...> actions:2
  recirc_id(0x1),dp_hash(0xb236c260/0xff),<...> actions:1
  recirc_id(0x1),dp_hash(0x7d89eb18/0xff),<...> actions:1
  recirc_id(0x1),dp_hash(0xa78d75df/0xff),<...> actions:2
  recirc_id(0x1),dp_hash(0xb58d846f/0xff),<...> actions:2
  recirc_id(0x1),dp_hash(0x24534406/0xff),<...> actions:1
  recirc_id(0x1),dp_hash(0x3cf32550/0xff),<...> actions:1

New scheme:
We can do with a single flow entry (for any number of new flows):

  in_port(7),<...> actions:lb_output(1)

A new CLI has been added to dump datapath bond cache as given below.

 # ovs-appctl dpif-netdev/bond-show [dp]

   Bond cache:
     bond-id 1 :
       bucket 0 - slave 2
       bucket 1 - slave 1
       bucket 2 - slave 2
       bucket 3 - slave 1

Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com>
Signed-off-by: Vishal Deep Ajmera <vishal.deep.ajmera@ericsson.com>
Tested-by: Matteo Croce <mcroce@redhat.com>
Tested-by: Adrian Moreno <amorenoz@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-05-22 10:50:05 +02:00
+								int dpif_bond_del(struct dpif *, uint32_t bond_id);
 								int dpif_bond_stats_get(struct dpif *, uint32_t bond_id, uint64_t *n_bytes);
 								bool dpif_supports_lb_output_action(const struct dpif *);
-												dpctl: dpif: Allow viewing and configuring dp cache sizes.

This patch adds a general way of viewing/configuring datapath
cache sizes. With an implementation for the netlink interface.

The ovs-dpctl/ovs-appctl show commands will display the
current cache sizes configured:

 $ ovs-dpctl show
 system@ovs-system:
   lookups: hit:25 missed:63 lost:0
   flows: 0
   masks: hit:282 total:0 hit/pkt:3.20
   cache: hit:4 hit-rate:4.54%
   caches:
     masks-cache: size:256
   port 0: ovs-system (internal)
   port 1: br-int (internal)
   port 2: genev_sys_6081 (geneve: packet_type=ptap)
   port 3: br-ex (internal)
   port 4: eth2
   port 5: sw0p1 (internal)
   port 6: sw0p3 (internal)

A specific cache can be configured as follows:

 $ ovs-appctl dpctl/cache-set-size DP CACHE SIZE
 $ ovs-dpctl cache-set-size DP CACHE SIZE

For example to disable the cache do:

 $ ovs-dpctl cache-set-size system@ovs-system masks-cache 0
 Setting cache size successful, new size 0.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Acked-by: Paolo Valerio <pvalerio@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-09-06 10:53:58 +02:00
 								/* Cache */
 								int dpif_cache_get_supported_levels(struct dpif *dpif, uint32_t *levels);
 								int dpif_cache_get_name(struct dpif *dpif, uint32_t level, const char **name);
 								int dpif_cache_get_size(struct dpif *dpif, uint32_t level, uint32_t *size);
 								int dpif_cache_set_size(struct dpif *dpif, uint32_t level, uint32_t size);
-												dpif: Meter framework.

Add DPIF-level infrastructure for meters.  Allow meter_set to modify
the meter configuration (e.g. set the burst size if unspecified).

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Signed-off-by: Andy Zhou <azhou@ovn.org>

											
										
										
											2017-02-23 11:27:54 -08:00
-												dpif: New function dpif_operate() and dpif-linux implementation.

This will be used in an upcoming commit.

											
										
										
											2011-09-27 15:08:50 -07:00
+								/* Miscellaneous. */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												dpif: New function dpif_get_netflow_ids().

The 'minor' member of struct dpif is used for two different purposes:
for printing in log messages and for encapsulating in NetFlow messages.
The needs in each case are different, so we should break up these uses.
This commit does half of that, by introducing a new function to retrieve
NetFlow ids and using it where appropriate.

											
										
										
											2009-06-16 10:59:43 -07:00
+								void dpif_get_netflow_ids(const struct dpif *,
 								                          uint8_t *engine_type, uint8_t *engine_id);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												dpif: Abstract translation from OpenFlow queue ID into ODP priority value.

When the QoS code was integrated, I didn't yet know how to abstract the
translation from a queue ID in an OpenFlow OFPAT_ENQUEUE action into a
priority value for an ODP ODPAT_SET_PRIORITY action.  This commit is a
first attempt that works OK for Linux, so far.  It's possible that in fact
this translation needs the 'netdev' as an argument too, but it's not needed
yet.

											
										
										
											2010-07-20 11:23:21 -07:00
+								int dpif_queue_to_priority(const struct dpif *, uint32_t queue_id,
 								                           uint32_t *priority);
-												dpctl: Avoid making assumptions on pmd threads.

Currently dpctl depends on ovs-numa module to delete and create flows on
different pmd threads for pmd devices.

The next commits will move away the pmd threads state from ovs-numa to
dpif-netdev, so the ovs-numa interface will not be supported.

Also, the assignment between ports and thread is an implementation
detail of dpif-netdev, dpctl shouldn't know anything about it.

This commit changes the dpif_flow_put() and dpif_flow_del() calls to
iterate over all the pmd threads, if pmd_id is PMD_ID_NULL.

A simple test is added.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
Acked-by: Ilya Maximets <i.maximets@samsung.com>

											
										
										
											2016-11-15 15:40:49 -08:00
+								int dpif_get_pmds_for_port(const struct dpif * dpif, odp_port_t port_no,
 								                           unsigned int **pmds, size_t *n);
-												bridge: Store datapath version into ovsdb

OVS userspace are backward compatible with older Linux kernel modules.
However, not having the most up-to-date datapath kernel modules can
some times lead to user confusion. Storing the datapath version in
OVSDB allows management software to check and optionally provide
notifications to users.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-10-16 15:23:11 -07:00
+								char *dpif_get_dp_version(const struct dpif *);
-												openvswitch: Userspace tunneling.

Following patch adds support for userspace tunneling. Tunneling
needs three more component first is routing table which is configured by
caching kernel routes and second is ARP cache which build automatically
by snooping arp. And third is tunnel protocol table which list all
listening protocols which is populated by vswitchd as tunnel ports
are added. GRE and VXLAN protocol support is added in this patch.

Tunneling works as follows:
On packet receive vswitchd check if this packet is targeted to tunnel
port. If it is then vswitchd inserts tunnel pop action which pops
header and sends packet to tunnel port.
On packet xmit rather than generating Set tunnel action it generate
tunnel push action which has tunnel header data. datapath can use
tunnel-push action data to generate header for each packet and
forward this packet to output port. Since tunnel-push action
contains most of packet header vswitchd needs to lookup routing
table and arp table to build this action.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-11-11 11:53:47 -08:00
+								bool dpif_supports_tnl_push_pop(const struct dpif *);
-												dpif: Probe support for OVS_ACTION_ATTR_DROP.

Kernel support has been added for this action. As such, we need to probe
the datapath for support.

Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Eric Garver <eric@garver.life>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-04-03 10:35:31 -04:00
+								bool dpif_may_support_explicit_drop_action(const struct dpif *);
-												ofproto_dpif: Check for psample support.

Only kernel datapath supports this action so add a function in dpif.c
that checks for that.

Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-07-13 23:23:39 +02:00
+								bool dpif_may_support_psample(const struct dpif *);
-												ofproto-dpif-upcall: Reset ukey's last stats value if the datapath changed.

When the ukey's action set changes, it could cause the flow to use a
different datapath, for example, when it moves from tc to kernel.
This will cause the the cached previous datapath statistics to be used.

This change will reset the cached statistics when a change in
datapath is discovered.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-02-27 16:29:26 +01:00
+								bool dpif_synced_dp_layers(struct dpif *);
-												dpif: Refactor flow logging functions to be used by other modules

To be reused by other modules.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>

											
										
										
											2017-06-13 18:03:53 +03:00
 								/* Log functions. */
 								struct vlog_module;
 								void log_flow_message(const struct dpif *dpif, int error,
 								                      const struct vlog_module *module,
 								                      const char *operation,
 								                      const struct nlattr *key, size_t key_len,
 								                      const struct nlattr *mask, size_t mask_len,
 								                      const ovs_u128 *ufid,
 								                      const struct dpif_flow_stats *stats,
 								                      const struct nlattr *actions, size_t actions_len);
 								void log_flow_put_message(const struct dpif *,
 								                          const struct vlog_module *,
 								                          const struct dpif_flow_put *,
 								                          int error);
 								void log_flow_del_message(const struct dpif *,
 								                          const struct vlog_module *,
 								                          const struct dpif_flow_del *,
 								                          int error);
 								void log_execute_message(const struct dpif *,
 								                         const struct vlog_module *,
 								                         const struct dpif_execute *,
 								                         bool subexecute, int error);
 								void log_flow_get_message(const struct dpif *,
 								                          const struct vlog_module *,
 								                          const struct dpif_flow_get *,
 								                          int error);
-												Add extern "C" to more header files.

From partner.

											
										
										
											2010-02-17 10:36:57 -05:00
+								#ifdef  __cplusplus
 								}
 								#endif
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#endif /* dpif.h */