ovs/lib/dpif-linux.c

/*
 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <config.h>

#include "dpif-linux.h"

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <net/if.h>
#include <linux/types.h>
#include <linux/ethtool.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/sockios.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <unistd.h>

#include "dpif-provider.h"
#include "netdev.h"
#include "netdev-vport.h"
#include "netlink.h"
#include "odp-util.h"
#include "ofpbuf.h"
#include "openvswitch/tunnel.h"
#include "packets.h"
#include "poll-loop.h"
#include "rtnetlink.h"
#include "rtnetlink-link.h"
#include "shash.h"
#include "svec.h"
#include "unaligned.h"
#include "util.h"
#include "vlog.h"

VLOG_DEFINE_THIS_MODULE(dpif_linux);

struct dpif_linux_dp {
    /* ioctl command argument. */
    int cmd;

    /* struct odp_datapath header. */
    uint32_t dp_idx;

    /* Attributes. */
    const char *name;                  /* ODP_DP_ATTR_NAME. */
    struct odp_stats stats;            /* ODP_DP_ATTR_STATS. */
    enum odp_frag_handling ipv4_frags; /* ODP_DP_ATTR_IPV4_FRAGS. */
    const uint32_t *sampling;          /* ODP_DP_ATTR_SAMPLING. */
};

static void dpif_linux_dp_init(struct dpif_linux_dp *);
static int dpif_linux_dp_transact(const struct dpif_linux_dp *request,
                                  struct dpif_linux_dp *reply,
                                  struct ofpbuf **bufp);
static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply,
                             struct ofpbuf **bufp);

struct dpif_linux_flow {
    /* ioctl command argument. */
    int cmd;

    /* struct odp_flow header. */
    unsigned int nlmsg_flags;
    uint32_t dp_idx;

    /* Attributes.
     *
     * The 'stats', 'used', and 'state' members point to 64-bit data that might
     * only be aligned on 32-bit boundaries, so get_unaligned_u64() should be
     * used to access their values. */
    const struct nlattr *key;           /* ODP_FLOW_ATTR_KEY. */
    size_t key_len;
    const struct nlattr *actions;       /* ODP_FLOW_ATTR_ACTIONS. */
    size_t actions_len;
    const struct odp_flow_stats *stats; /* ODP_FLOW_ATTR_STATS. */
    const uint8_t *tcp_flags;           /* ODP_FLOW_ATTR_TCP_FLAGS. */
    const uint64_t *used;               /* ODP_FLOW_ATTR_USED. */
    bool clear;                         /* ODP_FLOW_ATTR_CLEAR. */
    const uint64_t *state;              /* ODP_FLOW_ATTR_STATE. */
};

static void dpif_linux_flow_init(struct dpif_linux_flow *);
static int dpif_linux_flow_transact(const struct dpif_linux_flow *request,
                                    struct dpif_linux_flow *reply,
                                    struct ofpbuf **bufp);
static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *,
                                      struct dpif_flow_stats *);

/* Datapath interface for the openvswitch Linux kernel module. */
struct dpif_linux {
    struct dpif dpif;
    int fd;

    /* Used by dpif_linux_get_all_names(). */
    char *local_ifname;
    int minor;

    /* Change notification. */
    int local_ifindex;          /* Ifindex of local port. */
    struct shash changed_ports;  /* Ports that have changed. */
    struct rtnetlink_notifier port_notifier;
    bool change_error;
};

static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);

static int do_ioctl(const struct dpif *, int cmd, const void *arg);
static int open_dpif(const struct dpif_linux_vport *local_vport,
                     struct dpif **);
static int get_openvswitch_major(void);
static int open_minor(int minor, int *fdp);
static int make_openvswitch_device(int minor, char **fnp);
static void dpif_linux_port_changed(const struct rtnetlink_link_change *,
                                    void *dpif);

static struct dpif_linux *
dpif_linux_cast(const struct dpif *dpif)
{
    dpif_assert_class(dpif, &dpif_linux_class);
    return CONTAINER_OF(dpif, struct dpif_linux, dpif);
}

static int
dpif_linux_enumerate(struct svec *all_dps)
{
    uint32_t dp_idx;
    int major;

    /* Check that the Open vSwitch module is loaded. */
    major = get_openvswitch_major();
    if (major < 0) {
        return -major;
    }

    dp_idx = 0;
    for (;;) {
        struct dpif_linux_dp request, reply;
        struct ofpbuf *buf;
        char devname[16];
        int error;

        dpif_linux_dp_init(&request);
        request.dp_idx = dp_idx;
        request.cmd = ODP_DP_DUMP;

        error = dpif_linux_dp_transact(&request, &reply, &buf);
        if (error) {
            return error == ENODEV ? 0 : error;
        }
        ofpbuf_delete(buf);

        sprintf(devname, "dp%d", reply.dp_idx);
        svec_add(all_dps, devname);

        dp_idx = reply.dp_idx + 1;
    }
}

static int
dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
                bool create, struct dpif **dpifp)
{
    struct dpif_linux_vport request, reply;
    struct ofpbuf *buf;
    int minor;
    int error;

    minor = !strncmp(name, "dp", 2)
            && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1;
    if (create) {
        struct dpif_linux_dp request, reply;
        struct ofpbuf *buf;
        int error;

        dpif_linux_dp_init(&request);
        request.cmd = ODP_DP_NEW;
        request.dp_idx = minor;
        request.name = name;
        error = dpif_linux_dp_transact(&request, &reply, &buf);
        if (error) {
            return error;
        }
        minor = reply.dp_idx;
        ofpbuf_delete(buf);
    }

    dpif_linux_vport_init(&request);
    request.cmd = ODP_VPORT_GET;
    request.port_no = ODPP_LOCAL;
    if (minor >= 0) {
        request.dp_idx = minor;
    } else {
        request.name = name;
    }

    error = dpif_linux_vport_transact(&request, &reply, &buf);
    if (error) {
        return error;
    } else if (reply.port_no != ODPP_LOCAL) {
        /* This is an Open vSwitch device but not the local port.  We
         * intentionally support only using the name of the local port as the
         * name of a datapath; otherwise, it would be too difficult to
         * enumerate all the names of a datapath. */
        error = EOPNOTSUPP;
    } else {
        error = open_dpif(&reply, dpifp);
    }

    ofpbuf_delete(buf);
    return error;
}

static int
open_dpif(const struct dpif_linux_vport *local_vport, struct dpif **dpifp)
{
    int dp_idx = local_vport->dp_idx;
    struct dpif_linux *dpif;
    char *name;
    int error;
    int fd;

    error = open_minor(dp_idx, &fd);
    if (error) {
        goto error;
    }

    dpif = xmalloc(sizeof *dpif);
    error = rtnetlink_link_notifier_register(&dpif->port_notifier,
                                             dpif_linux_port_changed, dpif);
    if (error) {
        goto error_free;
    }

    name = xasprintf("dp%d", dp_idx);
    dpif_init(&dpif->dpif, &dpif_linux_class, name, dp_idx, dp_idx);
    free(name);

    dpif->fd = fd;
    dpif->local_ifname = xstrdup(local_vport->name);
    dpif->local_ifindex = local_vport->ifindex;
    dpif->minor = dp_idx;
    shash_init(&dpif->changed_ports);
    dpif->change_error = false;
    *dpifp = &dpif->dpif;

    return 0;

error_free:
    free(dpif);
    close(fd);
error:
    return error;
}

static void
dpif_linux_close(struct dpif *dpif_)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    rtnetlink_link_notifier_unregister(&dpif->port_notifier);
    shash_destroy(&dpif->changed_ports);
    free(dpif->local_ifname);
    close(dpif->fd);
    free(dpif);
}

static int
dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);

    svec_add_nocopy(all_names, xasprintf("dp%d", dpif->minor));
    svec_add(all_names, dpif->local_ifname);
    return 0;
}

static int
dpif_linux_destroy(struct dpif *dpif_)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_dp dp;

    dpif_linux_dp_init(&dp);
    dp.cmd = ODP_DP_DEL;
    dp.dp_idx = dpif->minor;
    return dpif_linux_dp_transact(&dp, NULL, NULL);
}

static int
dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats)
{
    struct dpif_linux_dp dp;
    struct ofpbuf *buf;
    int error;

    error = dpif_linux_dp_get(dpif_, &dp, &buf);
    if (!error) {
        *stats = dp.stats;
        ofpbuf_delete(buf);
    }
    return error;
}

static int
dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
{
    struct dpif_linux_dp dp;
    struct ofpbuf *buf;
    int error;

    error = dpif_linux_dp_get(dpif_, &dp, &buf);
    if (!error) {
        *drop_fragsp = dp.ipv4_frags == ODP_DP_FRAG_DROP;
        ofpbuf_delete(buf);
    }
    return error;
}

static int
dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_dp dp;

    dpif_linux_dp_init(&dp);
    dp.cmd = ODP_DP_SET;
    dp.dp_idx = dpif->minor;
    dp.ipv4_frags = drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO;
    return dpif_linux_dp_transact(&dp, NULL, NULL);
}

static int
dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
                    uint16_t *port_nop)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    const char *name = netdev_get_name(netdev);
    const char *type = netdev_get_type(netdev);
    struct dpif_linux_vport request, reply;
    const struct ofpbuf *options;
    struct ofpbuf *buf;
    int error;

    dpif_linux_vport_init(&request);
    request.cmd = ODP_VPORT_NEW;
    request.dp_idx = dpif->minor;
    request.type = netdev_vport_get_vport_type(netdev);
    if (request.type == ODP_VPORT_TYPE_UNSPEC) {
        VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
                     "unsupported type `%s'",
                     dpif_name(dpif_), name, type);
        return EINVAL;
    }
    request.name = name;

    options = netdev_vport_get_options(netdev);
    if (options && options->size) {
        request.options = options->data;
        request.options_len = options->size;
    }

    error = dpif_linux_vport_transact(&request, &reply, &buf);
    if (!error) {
        *port_nop = reply.port_no;
        ofpbuf_delete(buf);
    }

    return error;
}

static int
dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_vport vport;

    dpif_linux_vport_init(&vport);
    vport.cmd = ODP_VPORT_DEL;
    vport.dp_idx = dpif->minor;
    vport.port_no = port_no;
    return dpif_linux_vport_transact(&vport, NULL, NULL);
}

static int
dpif_linux_port_query__(const struct dpif *dpif, uint32_t port_no,
                        const char *port_name, struct dpif_port *dpif_port)
{
    struct dpif_linux_vport request;
    struct dpif_linux_vport reply;
    struct ofpbuf *buf;
    int error;

    dpif_linux_vport_init(&request);
    request.cmd = ODP_VPORT_GET;
    request.dp_idx = dpif_linux_cast(dpif)->minor;
    request.port_no = port_no;
    request.name = port_name;

    error = dpif_linux_vport_transact(&request, &reply, &buf);
    if (!error) {
        dpif_port->name = xstrdup(reply.name);
        dpif_port->type = xstrdup(netdev_vport_get_netdev_type(&reply));
        dpif_port->port_no = reply.port_no;
        ofpbuf_delete(buf);
    }
    return error;
}

static int
dpif_linux_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
                                struct dpif_port *dpif_port)
{
    return dpif_linux_port_query__(dpif, port_no, NULL, dpif_port);
}

static int
dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname,
                              struct dpif_port *dpif_port)
{
    return dpif_linux_port_query__(dpif, 0, devname, dpif_port);
}

static int
dpif_linux_get_max_ports(const struct dpif *dpif OVS_UNUSED)
{
    /* If the datapath increases its range of supported ports, then it should
     * start reporting that. */
    return 1024;
}

static int
dpif_linux_flow_flush(struct dpif *dpif_)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    return ioctl(dpif->fd, ODP_FLOW_FLUSH, dpif->minor) ? errno : 0;
}

struct dpif_linux_port_state {
    struct ofpbuf *buf;
    uint32_t next;
};

static int
dpif_linux_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
{
    *statep = xzalloc(sizeof(struct dpif_linux_port_state));
    return 0;
}

static int
dpif_linux_port_dump_next(const struct dpif *dpif, void *state_,
                          struct dpif_port *dpif_port)
{
    struct dpif_linux_port_state *state = state_;
    struct dpif_linux_vport request, reply;
    struct ofpbuf *buf;
    int error;

    ofpbuf_delete(state->buf);
    state->buf = NULL;

    dpif_linux_vport_init(&request);
    request.cmd = ODP_VPORT_DUMP;
    request.dp_idx = dpif_linux_cast(dpif)->minor;
    request.port_no = state->next;
    error = dpif_linux_vport_transact(&request, &reply, &buf);
    if (error) {
        return error == ENODEV ? EOF : error;
    } else {
        dpif_port->name = (char *) reply.name;
        dpif_port->type = (char *) netdev_vport_get_netdev_type(&reply);
        dpif_port->port_no = reply.port_no;
        state->buf = buf;
        state->next = reply.port_no + 1;
        return 0;
    }
}

static int
dpif_linux_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
{
    struct dpif_linux_port_state *state = state_;
    ofpbuf_delete(state->buf);
    free(state);
    return 0;
}

static int
dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);

    if (dpif->change_error) {
        dpif->change_error = false;
        shash_clear(&dpif->changed_ports);
        return ENOBUFS;
    } else if (!shash_is_empty(&dpif->changed_ports)) {
        struct shash_node *node = shash_first(&dpif->changed_ports);
        *devnamep = shash_steal(&dpif->changed_ports, node);
        return 0;
    } else {
        return EAGAIN;
    }
}

static void
dpif_linux_port_poll_wait(const struct dpif *dpif_)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) {
        poll_immediate_wake();
    } else {
        rtnetlink_link_notifier_wait();
    }
}

static int
dpif_linux_flow_get(const struct dpif *dpif_,
                    const struct nlattr *key, size_t key_len,
                    struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_flow request, reply;
    struct ofpbuf *buf;
    int error;

    dpif_linux_flow_init(&request);
    request.cmd = ODP_FLOW_GET;
    request.dp_idx = dpif->minor;
    request.key = key;
    request.key_len = key_len;
    error = dpif_linux_flow_transact(&request, &reply, &buf);
    if (!error) {
        if (stats) {
            dpif_linux_flow_get_stats(&reply, stats);
        }
        if (actionsp) {
            buf->data = (void *) reply.actions;
            buf->size = reply.actions_len;
            *actionsp = buf;
        } else {
            ofpbuf_delete(buf);
        }
    }
    return error;
}

static int
dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags,
                    const struct nlattr *key, size_t key_len,
                    const struct nlattr *actions, size_t actions_len,
                    struct dpif_flow_stats *stats)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_flow request, reply;
    struct ofpbuf *buf;
    int error;

    dpif_linux_flow_init(&request);
    request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_NEW : ODP_FLOW_SET;
    request.dp_idx = dpif->minor;
    request.key = key;
    request.key_len = key_len;
    request.actions = actions;
    request.actions_len = actions_len;
    if (flags & DPIF_FP_ZERO_STATS) {
        request.clear = true;
    }
    request.nlmsg_flags = flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
    error = dpif_linux_flow_transact(&request,
                                     stats ? &reply : NULL,
                                     stats ? &buf : NULL);
    if (!error && stats) {
        dpif_linux_flow_get_stats(&reply, stats);
        ofpbuf_delete(buf);
    }
    return error;
}

static int
dpif_linux_flow_del(struct dpif *dpif_,
                    const struct nlattr *key, size_t key_len,
                    struct dpif_flow_stats *stats)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_flow request, reply;
    struct ofpbuf *buf;
    int error;

    dpif_linux_flow_init(&request);
    request.cmd = ODP_FLOW_DEL;
    request.dp_idx = dpif->minor;
    request.key = key;
    request.key_len = key_len;
    error = dpif_linux_flow_transact(&request,
                                     stats ? &reply : NULL,
                                     stats ? &buf : NULL);
    if (!error && stats) {
        dpif_linux_flow_get_stats(&reply, stats);
        ofpbuf_delete(buf);
    }
    return error;
}


struct dpif_linux_flow_state {
    struct dpif_linux_flow flow;
    struct ofpbuf *buf;
    struct dpif_flow_stats stats;
};

static int
dpif_linux_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
{
    *statep = xzalloc(sizeof(struct dpif_linux_flow_state));
    return 0;
}

static int
dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_,
                          const struct nlattr **key, size_t *key_len,
                          const struct nlattr **actions, size_t *actions_len,
                          const struct dpif_flow_stats **stats)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_flow_state *state = state_;
    struct ofpbuf *old_buf = state->buf;
    struct dpif_linux_flow request;
    int error;

    dpif_linux_flow_init(&request);
    request.cmd = ODP_FLOW_DUMP;
    request.dp_idx = dpif->minor;
    request.state = state->flow.state;
    error = dpif_linux_flow_transact(&request, &state->flow, &state->buf);
    ofpbuf_delete(old_buf);

    if (!error) {
        if (key) {
            *key = state->flow.key;
            *key_len = state->flow.key_len;
        }
        if (actions) {
            *actions = state->flow.actions;
            *actions_len = state->flow.actions_len;
        }
        if (stats) {
            dpif_linux_flow_get_stats(&state->flow, &state->stats);
            *stats = &state->stats;
        }
    }
    return error == ENODEV ? EOF : error;
}

static int
dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
{
    struct dpif_linux_flow_state *state = state_;

    ofpbuf_delete(state->buf);
    free(state);
    return 0;
}

static int
dpif_linux_execute(struct dpif *dpif_,
                   const struct nlattr *actions, size_t actions_len,
                   const struct ofpbuf *buf)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct odp_execute execute;

    memset(&execute, 0, sizeof execute);
    execute.dp_idx = dpif->minor;
    execute.actions = (struct nlattr *) actions;
    execute.actions_len = actions_len;
    execute.data = buf->data;
    execute.length = buf->size;
    return do_ioctl(dpif_, ODP_EXECUTE, &execute);
}

static int
dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask)
{
    return do_ioctl(dpif_, ODP_GET_LISTEN_MASK, listen_mask);
}

static int
dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
{
    return do_ioctl(dpif_, ODP_SET_LISTEN_MASK, &listen_mask);
}

static int
dpif_linux_get_sflow_probability(const struct dpif *dpif_,
                                 uint32_t *probability)
{
    struct dpif_linux_dp dp;
    struct ofpbuf *buf;
    int error;

    error = dpif_linux_dp_get(dpif_, &dp, &buf);
    if (!error) {
        *probability = dp.sampling ? *dp.sampling : 0;
        ofpbuf_delete(buf);
    }
    return error;
}

static int
dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_dp dp;

    dpif_linux_dp_init(&dp);
    dp.cmd = ODP_DP_SET;
    dp.dp_idx = dpif->minor;
    dp.sampling = &probability;
    return dpif_linux_dp_transact(&dp, NULL, NULL);
}

static int
dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
                             uint32_t queue_id, uint32_t *priority)
{
    if (queue_id < 0xf000) {
        *priority = TC_H_MAKE(1 << 16, queue_id + 1);
        return 0;
    } else {
        return EINVAL;
    }
}

static int
parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall)
{
    static const struct nl_policy odp_packet_policy[] = {
        /* Always present. */
        [ODP_PACKET_ATTR_TYPE] = { .type = NL_A_U32 },
        [ODP_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
                                     .min_len = ETH_HEADER_LEN },
        [ODP_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },

        /* _ODPL_ACTION_NR only. */
        [ODP_PACKET_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true },

        /* _ODPL_SFLOW_NR only. */
        [ODP_PACKET_ATTR_SAMPLE_POOL] = { .type = NL_A_U32, .optional = true },
        [ODP_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
    };

    struct odp_packet *odp_packet = buf->data;
    struct nlattr *a[ARRAY_SIZE(odp_packet_policy)];

    if (!nl_policy_parse(buf, sizeof *odp_packet, odp_packet_policy,
                         a, ARRAY_SIZE(odp_packet_policy))) {
        return EINVAL;
    }

    memset(upcall, 0, sizeof *upcall);
    upcall->type = nl_attr_get_u32(a[ODP_PACKET_ATTR_TYPE]);
    upcall->packet = buf;
    upcall->packet->data = (void *) nl_attr_get(a[ODP_PACKET_ATTR_PACKET]);
    upcall->packet->size = nl_attr_get_size(a[ODP_PACKET_ATTR_PACKET]);
    upcall->key = (void *) nl_attr_get(a[ODP_PACKET_ATTR_KEY]);
    upcall->key_len = nl_attr_get_size(a[ODP_PACKET_ATTR_KEY]);
    upcall->userdata = (a[ODP_PACKET_ATTR_USERDATA]
                        ? nl_attr_get_u64(a[ODP_PACKET_ATTR_USERDATA])
                        : 0);
    upcall->sample_pool = (a[ODP_PACKET_ATTR_SAMPLE_POOL]
                        ? nl_attr_get_u32(a[ODP_PACKET_ATTR_SAMPLE_POOL])
                           : 0);
    if (a[ODP_PACKET_ATTR_ACTIONS]) {
        upcall->actions = (void *) nl_attr_get(a[ODP_PACKET_ATTR_ACTIONS]);
        upcall->actions_len = nl_attr_get_size(a[ODP_PACKET_ATTR_ACTIONS]);
    }

    return 0;
}

static int
dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct ofpbuf *buf;
    int retval;
    int error;

    buf = ofpbuf_new(65536);
    retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
    if (retval < 0) {
        error = errno;
        if (error != EAGAIN) {
            VLOG_WARN_RL(&error_rl, "%s: read failed: %s",
                         dpif_name(dpif_), strerror(error));
        }
    } else if (retval >= sizeof(struct odp_packet)) {
        struct odp_packet *odp_packet = buf->data;
        buf->size += retval;

        if (odp_packet->len <= retval) {
            error = parse_odp_packet(buf, upcall);
        } else {
            VLOG_WARN_RL(&error_rl, "%s: discarding message truncated "
                         "from %"PRIu32" bytes to %d",
                         dpif_name(dpif_), odp_packet->len, retval);
            error = ERANGE;
        }
    } else if (!retval) {
        VLOG_WARN_RL(&error_rl, "%s: unexpected end of file", dpif_name(dpif_));
        error = EPROTO;
    } else {
        VLOG_WARN_RL(&error_rl, "%s: discarding too-short message (%d bytes)",
                     dpif_name(dpif_), retval);
        error = ERANGE;
    }

    if (error) {
        ofpbuf_delete(buf);
    }
    return error;
}

static void
dpif_linux_recv_wait(struct dpif *dpif_)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    poll_fd_wait(dpif->fd, POLLIN);
}

static void
dpif_linux_recv_purge(struct dpif *dpif_)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    int i;

    /* This is somewhat bogus because it assumes that the following macros have
     * fixed values, but it's going to go away later.  */
#define DP_N_QUEUES 3
#define DP_MAX_QUEUE_LEN 100
    for (i = 0; i < DP_N_QUEUES * DP_MAX_QUEUE_LEN; i++) {
        /* Reading even 1 byte discards a whole datagram and saves time. */
        char buffer;

        if (read(dpif->fd, &buffer, 1) != 1) {
            break;
        }
    }
}

const struct dpif_class dpif_linux_class = {
    "system",
    NULL,
    NULL,
    dpif_linux_enumerate,
    dpif_linux_open,
    dpif_linux_close,
    dpif_linux_get_all_names,
    dpif_linux_destroy,
    dpif_linux_get_stats,
    dpif_linux_get_drop_frags,
    dpif_linux_set_drop_frags,
    dpif_linux_port_add,
    dpif_linux_port_del,
    dpif_linux_port_query_by_number,
    dpif_linux_port_query_by_name,
    dpif_linux_get_max_ports,
    dpif_linux_port_dump_start,
    dpif_linux_port_dump_next,
    dpif_linux_port_dump_done,
    dpif_linux_port_poll,
    dpif_linux_port_poll_wait,
    dpif_linux_flow_get,
    dpif_linux_flow_put,
    dpif_linux_flow_del,
    dpif_linux_flow_flush,
    dpif_linux_flow_dump_start,
    dpif_linux_flow_dump_next,
    dpif_linux_flow_dump_done,
    dpif_linux_execute,
    dpif_linux_recv_get_mask,
    dpif_linux_recv_set_mask,
    dpif_linux_get_sflow_probability,
    dpif_linux_set_sflow_probability,
    dpif_linux_queue_to_priority,
    dpif_linux_recv,
    dpif_linux_recv_wait,
    dpif_linux_recv_purge,
};

static int get_openvswitch_major(void);
static int get_major(const char *target);

static int
do_ioctl(const struct dpif *dpif_, int cmd, const void *arg)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    return ioctl(dpif->fd, cmd, arg) ? errno : 0;
}

bool
dpif_linux_is_internal_device(const char *name)
{
    struct dpif_linux_vport reply;
    struct ofpbuf *buf;
    int error;

    error = dpif_linux_vport_get(name, &reply, &buf);
    if (!error) {
        ofpbuf_delete(buf);
    } else if (error != ENODEV) {
        VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
                     name, strerror(error));
    }

    return reply.type == ODP_VPORT_TYPE_INTERNAL;
}

static int
make_openvswitch_device(int minor, char **fnp)
{
    const char dirname[] = "/dev/net";
    int major;
    dev_t dev;
    struct stat s;
    char fn[128];

    *fnp = NULL;

    major = get_openvswitch_major();
    if (major < 0) {
        return -major;
    }
    dev = makedev(major, minor);

    sprintf(fn, "%s/dp%d", dirname, minor);
    if (!stat(fn, &s)) {
        if (!S_ISCHR(s.st_mode)) {
            VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing",
                         fn);
        } else if (s.st_rdev != dev) {
            VLOG_WARN_RL(&error_rl,
                         "%s is device %u:%u but should be %u:%u, fixing",
                         fn, major(s.st_rdev), minor(s.st_rdev),
                         major(dev), minor(dev));
        } else {
            goto success;
        }
        if (unlink(fn)) {
            VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)",
                         fn, strerror(errno));
            return errno;
        }
    } else if (errno == ENOENT) {
        if (stat(dirname, &s)) {
            if (errno == ENOENT) {
                if (mkdir(dirname, 0755)) {
                    VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)",
                                 dirname, strerror(errno));
                    return errno;
                }
            } else {
                VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)",
                             dirname, strerror(errno));
                return errno;
            }
        }
    } else {
        VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno));
        return errno;
    }

    /* The device needs to be created. */
    if (mknod(fn, S_IFCHR | 0700, dev)) {
        VLOG_WARN_RL(&error_rl,
                     "%s: creating character device %u:%u failed (%s)",
                     fn, major(dev), minor(dev), strerror(errno));
        return errno;
    }

success:
    *fnp = xstrdup(fn);
    return 0;
}

/* Return the major device number of the Open vSwitch device.  If it
 * cannot be determined, a negative errno is returned. */
static int
get_openvswitch_major(void)
{
    static int openvswitch_major = -1;
    if (openvswitch_major < 0) {
        openvswitch_major = get_major("openvswitch");
    }
    return openvswitch_major;
}

static int
get_major(const char *target)
{
    const char fn[] = "/proc/devices";
    char line[128];
    FILE *file;
    int ln;

    file = fopen(fn, "r");
    if (!file) {
        VLOG_ERR("opening %s failed (%s)", fn, strerror(errno));
        return -errno;
    }

    for (ln = 1; fgets(line, sizeof line, file); ln++) {
        char name[64];
        int major;

        if (!strncmp(line, "Character", 9) || line[0] == '\0') {
            /* Nothing to do. */
        } else if (!strncmp(line, "Block", 5)) {
            /* We only want character devices, so skip the rest of the file. */
            break;
        } else if (sscanf(line, "%d %63s", &major, name)) {
            if (!strcmp(name, target)) {
                fclose(file);
                return major;
            }
        } else {
            VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln);
        }
    }

    fclose(file);

    VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target);
    return -ENODEV;
}

static int
open_minor(int minor, int *fdp)
{
    int error;
    char *fn;

    error = make_openvswitch_device(minor, &fn);
    if (error) {
        return error;
    }

    *fdp = open(fn, O_RDONLY | O_NONBLOCK);
    if (*fdp < 0) {
        error = errno;
        VLOG_WARN("%s: open failed (%s)", fn, strerror(error));
        free(fn);
        return error;
    }
    free(fn);
    return 0;
}

static void
dpif_linux_port_changed(const struct rtnetlink_link_change *change,
                        void *dpif_)
{
    struct dpif_linux *dpif = dpif_;

    if (change) {
        if (change->master_ifindex == dpif->local_ifindex
            && (change->nlmsg_type == RTM_NEWLINK
                || change->nlmsg_type == RTM_DELLINK))
        {
            /* Our datapath changed, either adding a new port or deleting an
             * existing one. */
            shash_add_once(&dpif->changed_ports, change->ifname, NULL);
        }
    } else {
        dpif->change_error = true;
    }
}

static int
get_dp0_fd(int *dp0_fdp)
{
    static int dp0_fd = -1;
    if (dp0_fd < 0) {
        int error;
        int fd;

        error = open_minor(0, &fd);
        if (error) {
            return error;
        }
        dp0_fd = fd;
    }
    *dp0_fdp = dp0_fd;
    return 0;
}

/* Parses the contents of 'buf', which contains a "struct odp_vport" followed
 * by Netlink attributes, into 'vport'.  Returns 0 if successful, otherwise a
 * positive errno value.
 *
 * 'vport' will contain pointers into 'buf', so the caller should not free
 * 'buf' while 'vport' is still in use. */
static int
dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport,
                             const struct ofpbuf *buf)
{
    static const struct nl_policy odp_vport_policy[] = {
        [ODP_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
        [ODP_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
        [ODP_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
        [ODP_VPORT_ATTR_STATS] = { .type = NL_A_UNSPEC,
                                   .min_len = sizeof(struct rtnl_link_stats64),
                                   .max_len = sizeof(struct rtnl_link_stats64),
                                   .optional = true },
        [ODP_VPORT_ATTR_ADDRESS] = { .type = NL_A_UNSPEC,
                                     .min_len = ETH_ADDR_LEN,
                                     .max_len = ETH_ADDR_LEN,
                                     .optional = true },
        [ODP_VPORT_ATTR_MTU] = { .type = NL_A_U32, .optional = true },
        [ODP_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
        [ODP_VPORT_ATTR_IFINDEX] = { .type = NL_A_U32, .optional = true },
        [ODP_VPORT_ATTR_IFLINK] = { .type = NL_A_U32, .optional = true },
    };

    struct odp_vport *odp_vport;
    struct nlattr *a[ARRAY_SIZE(odp_vport_policy)];

    dpif_linux_vport_init(vport);

    if (!nl_policy_parse(buf, sizeof *odp_vport, odp_vport_policy,
                         a, ARRAY_SIZE(odp_vport_policy))) {
        return EINVAL;
    }
    odp_vport = buf->data;

    vport->dp_idx = odp_vport->dp_idx;
    vport->port_no = nl_attr_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
    vport->type = nl_attr_get_u32(a[ODP_VPORT_ATTR_TYPE]);
    vport->name = nl_attr_get_string(a[ODP_VPORT_ATTR_NAME]);
    if (a[ODP_VPORT_ATTR_STATS]) {
        vport->stats = nl_attr_get(a[ODP_VPORT_ATTR_STATS]);
    }
    if (a[ODP_VPORT_ATTR_ADDRESS]) {
        vport->address = nl_attr_get(a[ODP_VPORT_ATTR_ADDRESS]);
    }
    if (a[ODP_VPORT_ATTR_MTU]) {
        vport->mtu = nl_attr_get_u32(a[ODP_VPORT_ATTR_MTU]);
    }
    if (a[ODP_VPORT_ATTR_OPTIONS]) {
        vport->options = nl_attr_get(a[ODP_VPORT_ATTR_OPTIONS]);
        vport->options_len = nl_attr_get_size(a[ODP_VPORT_ATTR_OPTIONS]);
    }
    if (a[ODP_VPORT_ATTR_IFINDEX]) {
        vport->ifindex = nl_attr_get_u32(a[ODP_VPORT_ATTR_IFINDEX]);
    }
    if (a[ODP_VPORT_ATTR_IFLINK]) {
        vport->iflink = nl_attr_get_u32(a[ODP_VPORT_ATTR_IFLINK]);
    }
    return 0;
}

/* Appends to 'buf' (which must initially be empty) a "struct odp_vport"
 * followed by Netlink attributes corresponding to 'vport'. */
static void
dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport,
                           struct ofpbuf *buf)
{
    struct odp_vport *odp_vport;

    ofpbuf_reserve(buf, sizeof odp_vport);

    if (vport->port_no != UINT32_MAX) {
        nl_msg_put_u32(buf, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
    }

    if (vport->type != ODP_VPORT_TYPE_UNSPEC) {
        nl_msg_put_u32(buf, ODP_VPORT_ATTR_TYPE, vport->type);
    }

    if (vport->name) {
        nl_msg_put_string(buf, ODP_VPORT_ATTR_NAME, vport->name);
    }

    if (vport->stats) {
        nl_msg_put_unspec(buf, ODP_VPORT_ATTR_STATS,
                          vport->stats, sizeof *vport->stats);
    }

    if (vport->address) {
        nl_msg_put_unspec(buf, ODP_VPORT_ATTR_ADDRESS,
                          vport->address, ETH_ADDR_LEN);
    }

    if (vport->mtu) {
        nl_msg_put_u32(buf, ODP_VPORT_ATTR_MTU, vport->mtu);
    }

    if (vport->options) {
        nl_msg_put_nested(buf, ODP_VPORT_ATTR_OPTIONS,
                          vport->options, vport->options_len);
    }

    if (vport->ifindex) {
        nl_msg_put_u32(buf, ODP_VPORT_ATTR_IFINDEX, vport->ifindex);
    }

    if (vport->iflink) {
        nl_msg_put_u32(buf, ODP_VPORT_ATTR_IFLINK, vport->iflink);
    }

    odp_vport = ofpbuf_push_uninit(buf, sizeof *odp_vport);
    odp_vport->dp_idx = vport->dp_idx;
    odp_vport->len = buf->size;
    odp_vport->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
}

/* Clears 'vport' to "empty" values. */
void
dpif_linux_vport_init(struct dpif_linux_vport *vport)
{
    memset(vport, 0, sizeof *vport);
    vport->dp_idx = UINT32_MAX;
    vport->port_no = UINT32_MAX;
}

/* Executes 'request' in the kernel datapath.  If the command fails, returns a
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
 * result of the command is expected to be an odp_vport also, which is decoded
 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
int
dpif_linux_vport_transact(const struct dpif_linux_vport *request,
                          struct dpif_linux_vport *reply,
                          struct ofpbuf **bufp)
{
    struct ofpbuf *buf = NULL;
    int error;
    int fd;

    assert((reply != NULL) == (bufp != NULL));

    error = get_dp0_fd(&fd);
    if (error) {
        goto error;
    }

    buf = ofpbuf_new(1024);
    dpif_linux_vport_to_ofpbuf(request, buf);

    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
    if (error) {
        goto error;
    }

    if (bufp) {
        buf->size = ((struct odp_vport *) buf->data)->len;
        error = dpif_linux_vport_from_ofpbuf(reply, buf);
        if (error) {
            goto error;
        }
        *bufp = buf;
    } else {
        ofpbuf_delete(buf);
    }
    return 0;

error:
    ofpbuf_delete(buf);
    if (bufp) {
        memset(reply, 0, sizeof *reply);
        *bufp = NULL;
    }
    return error;
}

/* Obtains information about the kernel vport named 'name' and stores it into
 * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
 * longer needed ('reply' will contain pointers into '*bufp').  */
int
dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply,
                     struct ofpbuf **bufp)
{
    struct dpif_linux_vport request;

    dpif_linux_vport_init(&request);
    request.cmd = ODP_VPORT_GET;
    request.name = name;

    return dpif_linux_vport_transact(&request, reply, bufp);
}

/* Parses the contents of 'buf', which contains a "struct odp_datapath"
 * followed by Netlink attributes, into 'dp'.  Returns 0 if successful,
 * otherwise a positive errno value.
 *
 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
 * while 'dp' is still in use. */
static int
dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf)
{
    static const struct nl_policy odp_datapath_policy[] = {
        [ODP_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
        [ODP_DP_ATTR_STATS] = { .type = NL_A_UNSPEC,
                                .min_len = sizeof(struct odp_stats),
                                .max_len = sizeof(struct odp_stats),
                                .optional = true },
        [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true },
        [ODP_DP_ATTR_SAMPLING] = { .type = NL_A_U32, .optional = true },
    };

    struct odp_datapath *odp_dp;
    struct nlattr *a[ARRAY_SIZE(odp_datapath_policy)];

    dpif_linux_dp_init(dp);

    if (!nl_policy_parse(buf, sizeof *odp_dp, odp_datapath_policy,
                         a, ARRAY_SIZE(odp_datapath_policy))) {
        return EINVAL;
    }
    odp_dp = buf->data;

    dp->dp_idx = odp_dp->dp_idx;
    dp->name = nl_attr_get_string(a[ODP_DP_ATTR_NAME]);
    if (a[ODP_DP_ATTR_STATS]) {
        /* Can't use structure assignment because Netlink doesn't ensure
         * sufficient alignment for 64-bit members. */
        memcpy(&dp->stats, nl_attr_get(a[ODP_DP_ATTR_STATS]),
               sizeof dp->stats);
    }
    if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
        dp->ipv4_frags = nl_attr_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
    }
    if (a[ODP_DP_ATTR_SAMPLING]) {
        dp->sampling = nl_attr_get(a[ODP_DP_ATTR_SAMPLING]);
    }
    return 0;
}

/* Appends to 'buf' (which must initially be empty) a "struct odp_datapath"
 * followed by Netlink attributes corresponding to 'dp'. */
static void
dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf)
{
    struct odp_datapath *odp_dp;

    ofpbuf_reserve(buf, sizeof odp_dp);

    if (dp->name) {
        nl_msg_put_string(buf, ODP_DP_ATTR_NAME, dp->name);
    }

    /* Skip ODP_DP_ATTR_STATS since we never have a reason to serialize it. */

    if (dp->ipv4_frags) {
        nl_msg_put_u32(buf, ODP_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags);
    }

    if (dp->sampling) {
        nl_msg_put_u32(buf, ODP_DP_ATTR_SAMPLING, *dp->sampling);
    }

    odp_dp = ofpbuf_push_uninit(buf, sizeof *odp_dp);
    odp_dp->dp_idx = dp->dp_idx;
    odp_dp->len = buf->size;
    odp_dp->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
}

/* Clears 'dp' to "empty" values. */
void
dpif_linux_dp_init(struct dpif_linux_dp *dp)
{
    memset(dp, 0, sizeof *dp);
    dp->dp_idx = -1;
}

/* Executes 'request' in the kernel datapath.  If the command fails, returns a
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
 * result of the command is expected to be an odp_datapath also, which is
 * decoded and stored in '*reply' and '*bufp'.  The caller must free '*bufp'
 * when the reply is no longer needed ('reply' will contain pointers into
 * '*bufp'). */
int
dpif_linux_dp_transact(const struct dpif_linux_dp *request,
                       struct dpif_linux_dp *reply, struct ofpbuf **bufp)
{
    struct ofpbuf *buf = NULL;
    int error;
    int fd;

    assert((reply != NULL) == (bufp != NULL));

    error = get_dp0_fd(&fd);
    if (error) {
        goto error;
    }

    buf = ofpbuf_new(1024);
    dpif_linux_dp_to_ofpbuf(request, buf);

    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
    if (error) {
        goto error;
    }

    if (bufp) {
        buf->size = ((struct odp_datapath *) buf->data)->len;
        error = dpif_linux_dp_from_ofpbuf(reply, buf);
        if (error) {
            goto error;
        }
        *bufp = buf;
    } else {
        ofpbuf_delete(buf);
    }
    return 0;

error:
    ofpbuf_delete(buf);
    if (bufp) {
        memset(reply, 0, sizeof *reply);
        *bufp = NULL;
    }
    return error;
}

/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
 * The caller must free '*bufp' when the reply is no longer needed ('reply'
 * will contain pointers into '*bufp').  */
int
dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply,
                  struct ofpbuf **bufp)
{
    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
    struct dpif_linux_dp request;

    dpif_linux_dp_init(&request);
    request.cmd = ODP_DP_GET;
    request.dp_idx = dpif->minor;

    return dpif_linux_dp_transact(&request, reply, bufp);
}

/* Parses the contents of 'buf', which contains a "struct odp_flow" followed by
 * Netlink attributes, into 'flow'.  Returns 0 if successful, otherwise a
 * positive errno value.
 *
 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
 * while 'flow' is still in use. */
static int
dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow,
                            const struct ofpbuf *buf)
{
    static const struct nl_policy odp_flow_policy[] = {
        [ODP_FLOW_ATTR_KEY] = { .type = NL_A_NESTED },
        [ODP_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
        [ODP_FLOW_ATTR_STATS] = { .type = NL_A_UNSPEC,
                                  .min_len = sizeof(struct odp_flow_stats),
                                  .max_len = sizeof(struct odp_flow_stats),
                                  .optional = true },
        [ODP_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
        [ODP_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
        /* The kernel never uses ODP_FLOW_ATTR_CLEAR. */
        [ODP_FLOW_ATTR_STATE] = { .type = NL_A_U64, .optional = true },
    };

    struct odp_flow *odp_flow;
    struct nlattr *a[ARRAY_SIZE(odp_flow_policy)];

    dpif_linux_flow_init(flow);

    if (!nl_policy_parse(buf, sizeof *odp_flow, odp_flow_policy,
                         a, ARRAY_SIZE(odp_flow_policy))) {
        return EINVAL;
    }
    odp_flow = buf->data;

    flow->nlmsg_flags = odp_flow->nlmsg_flags;
    flow->dp_idx = odp_flow->dp_idx;
    flow->key = nl_attr_get(a[ODP_FLOW_ATTR_KEY]);
    flow->key_len = nl_attr_get_size(a[ODP_FLOW_ATTR_KEY]);
    if (a[ODP_FLOW_ATTR_ACTIONS]) {
        flow->actions = nl_attr_get(a[ODP_FLOW_ATTR_ACTIONS]);
        flow->actions_len = nl_attr_get_size(a[ODP_FLOW_ATTR_ACTIONS]);
    }
    if (a[ODP_FLOW_ATTR_STATS]) {
        flow->stats = nl_attr_get(a[ODP_FLOW_ATTR_STATS]);
    }
    if (a[ODP_FLOW_ATTR_TCP_FLAGS]) {
        flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]);
    }
    if (a[ODP_FLOW_ATTR_STATE]) {
        flow->state = nl_attr_get(a[ODP_FLOW_ATTR_STATE]);
    }
    return 0;
}

/* Appends to 'buf' (which must initially be empty) a "struct odp_flow"
 * followed by Netlink attributes corresponding to 'flow'. */
static void
dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow,
                          struct ofpbuf *buf)
{
    struct odp_flow *odp_flow;

    ofpbuf_reserve(buf, sizeof odp_flow);

    if (flow->key_len) {
        nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len);
    }

    if (flow->actions_len) {
        nl_msg_put_unspec(buf, ODP_FLOW_ATTR_ACTIONS,
                          flow->actions, flow->actions_len);
    }

    /* We never need to send these to the kernel. */
    assert(!flow->stats);
    assert(!flow->tcp_flags);
    assert(!flow->used);

    if (flow->clear) {
        nl_msg_put_flag(buf, ODP_FLOW_ATTR_CLEAR);
    }

    if (flow->state) {
        nl_msg_put_u64(buf, ODP_FLOW_ATTR_STATE,
                       get_unaligned_u64(flow->state));
    }

    odp_flow = ofpbuf_push_uninit(buf, sizeof *odp_flow);
    odp_flow->nlmsg_flags = flow->nlmsg_flags;
    odp_flow->dp_idx = flow->dp_idx;
    odp_flow->len = buf->size;
    odp_flow->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
}

/* Clears 'flow' to "empty" values. */
void
dpif_linux_flow_init(struct dpif_linux_flow *flow)
{
    memset(flow, 0, sizeof *flow);
}

/* Executes 'request' in the kernel datapath.  If the command fails, returns a
 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
 * result of the command is expected to be an odp_flow also, which is decoded
 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
int
dpif_linux_flow_transact(const struct dpif_linux_flow *request,
                         struct dpif_linux_flow *reply, struct ofpbuf **bufp)
{
    struct ofpbuf *buf = NULL;
    int error;
    int fd;

    assert((reply != NULL) == (bufp != NULL));

    error = get_dp0_fd(&fd);
    if (error) {
        goto error;
    }

    buf = ofpbuf_new(1024);
    dpif_linux_flow_to_ofpbuf(request, buf);

    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
    if (error) {
        goto error;
    }

    if (bufp) {
        buf->size = ((struct odp_flow *) buf->data)->len;
        error = dpif_linux_flow_from_ofpbuf(reply, buf);
        if (error) {
            goto error;
        }
        *bufp = buf;
    } else {
        ofpbuf_delete(buf);
    }
    return 0;

error:
    ofpbuf_delete(buf);
    if (bufp) {
        memset(reply, 0, sizeof *reply);
        *bufp = NULL;
    }
    return error;
}

static void
dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow,
                          struct dpif_flow_stats *stats)
{
    if (flow->stats) {
        stats->n_packets = get_unaligned_u64(&flow->stats->n_packets);
        stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes);
    } else {
        stats->n_packets = 0;
        stats->n_bytes = 0;
    }
    stats->used = flow->used ? get_unaligned_u64(flow->used) : 0;
    stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
}
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								/*
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
 								#include <config.h>
-												datapath: Drop datapath index and port number from Ethtool output.

I introduced this a long time ago as an efficient way for userspace to find
out whether and where an internal device was attached, but I've always
considered it an ugly kluge.  Now that ODP_VPORT_QUERY can fetch a vport's
info regardless of datapath, it is no longer necessary.  This commit
stops using Ethtool for this purpose and drops the feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-29 14:20:16 -08:00
 								#include "dpif-linux.h"
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
 								#include <assert.h>
 								#include <ctype.h>
 								#include <errno.h>
 								#include <fcntl.h>
 								#include <inttypes.h>
 								#include <net/if.h>
-												datapath: Make datapath-protocol.h portable to non-Linux systems.

datapath-protocol.h is not a very clean interface.  I originally intended
it to be solely a Linux-kernel specific interface.  Over time it became
a general-purpose interface to dpifs.  This is not a good situation,
because clearly the header is still Linux-specific.

In the long run, the correct solution is to separate the generic and
Linux-specific bits.  This is not that patch.  Instead, this patch modifies
datapath-protocol.h enough that it can be used on non-Linux hosts.  In
particular I tested that it works OK with FreeBSD 8.0.

											
										
										
											2010-05-26 15:32:34 -07:00
+								#include <linux/types.h>
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								#include <linux/ethtool.h>
-												dpif: Abstract translation from OpenFlow queue ID into ODP priority value.

When the QoS code was integrated, I didn't yet know how to abstract the
translation from a queue ID in an OpenFlow OFPAT_ENQUEUE action into a
priority value for an ODP ODPAT_SET_PRIORITY action.  This commit is a
first attempt that works OK for Linux, so far.  It's possible that in fact
this translation needs the 'netdev' as an argument too, but it's not needed
yet.

											
										
										
											2010-07-20 11:23:21 -07:00
+								#include <linux/pkt_sched.h>
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								#include <linux/rtnetlink.h>
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								#include <linux/sockios.h>
 								#include <stdlib.h>
 								#include <sys/ioctl.h>
-												dpif: Include stat.h header

											
										
										
											2010-05-20 13:26:48 -07:00
+								#include <sys/stat.h>
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								#include <unistd.h>
 								#include "dpif-provider.h"
-												dpif-linux: Clean up vports that are no longer in config.

If the config changes while ovs-vswitchd is not running it is possible
that there could be some vports which are no longer needed but won't
be destroyed when closed because they aren't open.  This deletes
unneeded vports at the same time that we clean up unneeded datapaths.

											
										
										
											2010-04-10 01:19:29 -04:00
+								#include "netdev.h"
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								#include "netdev-vport.h"
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								#include "netlink.h"
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								#include "odp-util.h"
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								#include "ofpbuf.h"
-												vswitch: Use "ipsec_gre" vport instead of "gre" with "other_config"

Previously, a GRE-over-IPsec tunnel was created as an interface with a
"type" of "gre" and the "other_config" column with "ipsec_cert" or
"ipsec_psk" set.  This could lead to a potential security problem if a user
intended to create a GRE-over-IPsec tunnel, but misconfigured the
"ipsec_*" config and created an unencrypted GRE tunnel.

This commit defines an "ipsec_gre" tunnel type, which should prevent
users from inadvertently establishing insecure tunnels.

											
										
										
											2010-12-01 17:23:33 -08:00
+								#include "openvswitch/tunnel.h"
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								#include "packets.h"
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								#include "poll-loop.h"
-												rtnetlink: Move into separate source and header file.

Now that rtnetlink isn't named similarly to netdev_linux, it might as well
have its own source and header files to avoid confusing everyone.

											
										
										
											2009-07-28 13:05:20 -07:00
+								#include "rtnetlink.h"
-												rtnetlink: Remove LINK specific messages from rtnetlink

Abstracted rtnetlink so that it may be used for messages other than
RTM LINK messages.  Created a new rtnetlink-link module which
specifically deals with these kinds of messages and follows the old
rtnetlink API.

											
										
										
											2010-12-21 13:44:37 -08:00
+								#include "rtnetlink-link.h"
-												dpif-linux: Use hash instead of sorted array.

With 1000 network devices being added or removed, sorting the array was a
profiling hot spot.  Using a hash makes it drop off the profile.

											
										
										
											2010-05-03 13:47:28 -07:00
+								#include "shash.h"
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								#include "svec.h"
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								#include "unaligned.h"
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								#include "util.h"
 								#include "vlog.h"
-												vlog: Introduce VLOG_DEFINE_THIS_MODULE for declaring vlog module in use.

Adding a macro to define the vlog module in use adds a level of
indirection, which makes it easier to change how the vlog module must be
defined.  A followup commit needs to do that, so getting these widespread
changes out of the way first should make that commit easier to review.

											
										
										
											2010-07-16 11:02:49 -07:00
-												vlog: Make client supply semicolon for VLOG_DEFINE_THIS_MODULE.

It's kind of odd for VLOG_DEFINE_THIS_MODULE to supply its own semicolon,
so this commit switches to the more common form.

											
										
										
											2010-10-19 14:47:01 -07:00
+								VLOG_DEFINE_THIS_MODULE(dpif_linux);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								struct dpif_linux_dp {
 								    /* ioctl command argument. */
 								    int cmd;
 								    /* struct odp_datapath header. */
 								    uint32_t dp_idx;
 								    /* Attributes. */
 								    const char *name;                  /* ODP_DP_ATTR_NAME. */
 								    struct odp_stats stats;            /* ODP_DP_ATTR_STATS. */
 								    enum odp_frag_handling ipv4_frags; /* ODP_DP_ATTR_IPV4_FRAGS. */
 								    const uint32_t *sampling;          /* ODP_DP_ATTR_SAMPLING. */
 								};
 								static void dpif_linux_dp_init(struct dpif_linux_dp *);
 								static int dpif_linux_dp_transact(const struct dpif_linux_dp *request,
 								                                  struct dpif_linux_dp *reply,
 								                                  struct ofpbuf **bufp);
 								static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply,
 								                             struct ofpbuf **bufp);
 								struct dpif_linux_flow {
 								    /* ioctl command argument. */
 								    int cmd;
 								    /* struct odp_flow header. */
 								    unsigned int nlmsg_flags;
 								    uint32_t dp_idx;
 								    /* Attributes.
 								     *
 								     * The 'stats', 'used', and 'state' members point to 64-bit data that might
 								     * only be aligned on 32-bit boundaries, so get_unaligned_u64() should be
 								     * used to access their values. */
 								    const struct nlattr *key;           /* ODP_FLOW_ATTR_KEY. */
 								    size_t key_len;
 								    const struct nlattr *actions;       /* ODP_FLOW_ATTR_ACTIONS. */
 								    size_t actions_len;
 								    const struct odp_flow_stats *stats; /* ODP_FLOW_ATTR_STATS. */
 								    const uint8_t *tcp_flags;           /* ODP_FLOW_ATTR_TCP_FLAGS. */
 								    const uint64_t *used;               /* ODP_FLOW_ATTR_USED. */
 								    bool clear;                         /* ODP_FLOW_ATTR_CLEAR. */
 								    const uint64_t *state;              /* ODP_FLOW_ATTR_STATE. */
 								};
 								static void dpif_linux_flow_init(struct dpif_linux_flow *);
 								static int dpif_linux_flow_transact(const struct dpif_linux_flow *request,
 								                                    struct dpif_linux_flow *reply,
 								                                    struct ofpbuf **bufp);
 								static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *,
 								                                      struct dpif_flow_stats *);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								/* Datapath interface for the openvswitch Linux kernel module. */
 								struct dpif_linux {
 								    struct dpif dpif;
 								    int fd;
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								    /* Used by dpif_linux_get_all_names(). */
 								    char *local_ifname;
 								    int minor;
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								    /* Change notification. */
 								    int local_ifindex;          /* Ifindex of local port. */
-												dpif-linux: Use hash instead of sorted array.

With 1000 network devices being added or removed, sorting the array was a
profiling hot spot.  Using a hash makes it drop off the profile.

											
										
										
											2010-05-03 13:47:28 -07:00
+								    struct shash changed_ports;  /* Ports that have changed. */
-												netdev-linux: Rename "linux_netdev_*" to "rtnetlink_*".

It was getting to be too confusing to have both netdev_linux_* functions
and linux_netdev_* functions.  Rename the latter to make the distinction
more obvious.  "rtnetlink" seems to be a fairly good name because that's
what the kernel calls it, so the name will be familiar at least to people
who know about rtnetlink.

											
										
										
											2009-07-28 12:36:32 -07:00
+								    struct rtnetlink_notifier port_notifier;
-												netdev: Implement an abstract interface to network devices.

This new abstraction layer allows multiple implementations of network
devices in a single running process.  This will be useful, for example, to
support network devices that are simulated entirely in the running process
or that communicate with other processes over Unix domain sockets, etc.

The reimplemented tap device support in this commit has not been tested.

											
										
										
											2009-07-30 16:04:45 -07:00
+								    bool change_error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								};
 								static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
 								static int do_ioctl(const struct dpif *, int cmd, const void *arg);
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								static int open_dpif(const struct dpif_linux_vport *local_vport,
 								                     struct dpif **);
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								static int get_openvswitch_major(void);
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								static int open_minor(int minor, int *fdp);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int make_openvswitch_device(int minor, char **fnp);
-												rtnetlink: Remove LINK specific messages from rtnetlink

Abstracted rtnetlink so that it may be used for messages other than
RTM LINK messages.  Created a new rtnetlink-link module which
specifically deals with these kinds of messages and follows the old
rtnetlink API.

											
										
										
											2010-12-21 13:44:37 -08:00
+								static void dpif_linux_port_changed(const struct rtnetlink_link_change *,
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								                                    void *dpif);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
 								static struct dpif_linux *
 								dpif_linux_cast(const struct dpif *dpif)
 								{
 								    dpif_assert_class(dpif, &dpif_linux_class);
 								    return CONTAINER_OF(dpif, struct dpif_linux, dpif);
 								}
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								static int
 								dpif_linux_enumerate(struct svec *all_dps)
 								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    uint32_t dp_idx;
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								    int major;
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								    /* Check that the Open vSwitch module is loaded. */
 								    major = get_openvswitch_major();
 								    if (major < 0) {
 								        return -major;
 								    }
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    dp_idx = 0;
 								    for (;;) {
 								        struct dpif_linux_dp request, reply;
 								        struct ofpbuf *buf;
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								        char devname[16];
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        int error;
 								        dpif_linux_dp_init(&request);
 								        request.dp_idx = dp_idx;
 								        request.cmd = ODP_DP_DUMP;
 								        error = dpif_linux_dp_transact(&request, &reply, &buf);
 								        if (error) {
 								            return error == ENODEV ? 0 : error;
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								        }
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        ofpbuf_delete(buf);
 								        sprintf(devname, "dp%d", reply.dp_idx);
 								        svec_add(all_dps, devname);
 								        dp_idx = reply.dp_idx + 1;
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								    }
 								}
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int
-												dpif: Make dpif_class 'open' function take class instead of type name.

This makes it easier for dpif_provider implementations to share code but
distinguish the class actually in use, because comparing a pointer is
easier than comparing a string.

											
										
										
											2010-11-18 10:06:41 -08:00
+								dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
 								                bool create, struct dpif **dpifp)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    struct dpif_linux_vport request, reply;
 								    struct ofpbuf *buf;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    int minor;
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    int error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
-												Cleanup isdigit() warnings.

NetBSD's gcc complains if isdigit()'s argument is an unadorned char.  This
provides an appropriate cast.

											
										
										
											2009-08-25 14:11:44 -07:00
+								    minor = !strncmp(name, "dp", 2)
 								            && isdigit((unsigned char)name[2]) ? atoi(name + 2) : -1;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    if (create) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        struct dpif_linux_dp request, reply;
 								        struct ofpbuf *buf;
 								        int error;
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        dpif_linux_dp_init(&request);
 								        request.cmd = ODP_DP_NEW;
 								        request.dp_idx = minor;
 								        request.name = name;
 								        error = dpif_linux_dp_transact(&request, &reply, &buf);
 								        if (error) {
 								            return error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								        }
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        minor = reply.dp_idx;
 								        ofpbuf_delete(buf);
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    }
 								    dpif_linux_vport_init(&request);
 								    request.cmd = ODP_VPORT_GET;
 								    request.port_no = ODPP_LOCAL;
 								    if (minor >= 0) {
 								        request.dp_idx = minor;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    } else {
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								        request.name = name;
 								    }
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    error = dpif_linux_vport_transact(&request, &reply, &buf);
 								    if (error) {
 								        return error;
 								    } else if (reply.port_no != ODPP_LOCAL) {
 								        /* This is an Open vSwitch device but not the local port.  We
 								         * intentionally support only using the name of the local port as the
 								         * name of a datapath; otherwise, it would be too difficult to
 								         * enumerate all the names of a datapath. */
 								        error = EOPNOTSUPP;
 								    } else {
 								        error = open_dpif(&reply, dpifp);
 								    }
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    ofpbuf_delete(buf);
 								    return error;
 								}
 								static int
 								open_dpif(const struct dpif_linux_vport *local_vport, struct dpif **dpifp)
 								{
 								    int dp_idx = local_vport->dp_idx;
 								    struct dpif_linux *dpif;
 								    char *name;
 								    int error;
 								    int fd;
 								    error = open_minor(dp_idx, &fd);
 								    if (error) {
 								        goto error;
 								    }
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    dpif = xmalloc(sizeof *dpif);
 								    error = rtnetlink_link_notifier_register(&dpif->port_notifier,
 								                                             dpif_linux_port_changed, dpif);
 								    if (error) {
 								        goto error_free;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    }
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
 								    name = xasprintf("dp%d", dp_idx);
 								    dpif_init(&dpif->dpif, &dpif_linux_class, name, dp_idx, dp_idx);
 								    free(name);
 								    dpif->fd = fd;
 								    dpif->local_ifname = xstrdup(local_vport->name);
 								    dpif->local_ifindex = local_vport->ifindex;
 								    dpif->minor = dp_idx;
 								    shash_init(&dpif->changed_ports);
 								    dpif->change_error = false;
 								    *dpifp = &dpif->dpif;
 								    return 0;
 								error_free:
 								    free(dpif);
 								    close(fd);
 								error:
 								    return error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static void
 								dpif_linux_close(struct dpif *dpif_)
 								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												rtnetlink: Remove LINK specific messages from rtnetlink

Abstracted rtnetlink so that it may be used for messages other than
RTM LINK messages.  Created a new rtnetlink-link module which
specifically deals with these kinds of messages and follows the old
rtnetlink API.

											
										
										
											2010-12-21 13:44:37 -08:00
+								    rtnetlink_link_notifier_unregister(&dpif->port_notifier);
-												dpif-linux: Use hash instead of sorted array.

With 1000 network devices being added or removed, sorting the array was a
profiling hot spot.  Using a hash makes it drop off the profile.

											
										
										
											2010-05-03 13:47:28 -07:00
+								    shash_destroy(&dpif->changed_ports);
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								    free(dpif->local_ifname);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    close(dpif->fd);
 								    free(dpif);
 								}
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								static int
 								dpif_linux_get_all_names(const struct dpif *dpif_, struct svec *all_names)
 								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    svec_add_nocopy(all_names, xasprintf("dp%d", dpif->minor));
 								    svec_add(all_names, dpif->local_ifname);
 								    return 0;
 								}
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int
-												Fix some regressions from the merge from master.

											
										
										
											2010-02-08 13:22:41 -05:00
+								dpif_linux_destroy(struct dpif *dpif_)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    struct dpif_linux_dp dp;
 								    dpif_linux_dp_init(&dp);
 								    dp.cmd = ODP_DP_DEL;
 								    dp.dp_idx = dpif->minor;
 								    return dpif_linux_dp_transact(&dp, NULL, NULL);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
 								dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats)
 								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux_dp dp;
 								    struct ofpbuf *buf;
 								    int error;
 								    error = dpif_linux_dp_get(dpif_, &dp, &buf);
 								    if (!error) {
 								        *stats = dp.stats;
 								        ofpbuf_delete(buf);
 								    }
 								    return error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
 								dpif_linux_get_drop_frags(const struct dpif *dpif_, bool *drop_fragsp)
 								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux_dp dp;
 								    struct ofpbuf *buf;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    int error;
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    error = dpif_linux_dp_get(dpif_, &dp, &buf);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    if (!error) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        *drop_fragsp = dp.ipv4_frags == ODP_DP_FRAG_DROP;
 								        ofpbuf_delete(buf);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    }
 								    return error;
 								}
 								static int
 								dpif_linux_set_drop_frags(struct dpif *dpif_, bool drop_frags)
 								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    struct dpif_linux_dp dp;
 								    dpif_linux_dp_init(&dp);
 								    dp.cmd = ODP_DP_SET;
 								    dp.dp_idx = dpif->minor;
 								    dp.ipv4_frags = drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO;
 								    return dpif_linux_dp_transact(&dp, NULL, NULL);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								                    uint16_t *port_nop)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								    const char *name = netdev_get_name(netdev);
 								    const char *type = netdev_get_type(netdev);
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    struct dpif_linux_vport request, reply;
 								    const struct ofpbuf *options;
 								    struct ofpbuf *buf;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    int error;
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    dpif_linux_vport_init(&request);
 								    request.cmd = ODP_VPORT_NEW;
 								    request.dp_idx = dpif->minor;
 								    request.type = netdev_vport_get_vport_type(netdev);
 								    if (request.type == ODP_VPORT_TYPE_UNSPEC) {
-												datapath: Change vport type from string to integer enumeration.

I plan to make the vport type part of the standard header stuck on each
Netlink message related to a vport.  As such, it is more convenient to use
an integer than a string.  In addition, by being fundamentally different
from strings, using an integer may reduce the confusion we've had in the
past over the differences in userspace and kernel names for network device
and vport types.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 20:01:30 -08:00
+								        VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
 								                     "unsupported type `%s'",
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								                     dpif_name(dpif_), name, type);
-												datapath: Change vport type from string to integer enumeration.

I plan to make the vport type part of the standard header stuck on each
Netlink message related to a vport.  As such, it is more convenient to use
an integer than a string.  In addition, by being fundamentally different
from strings, using an integer may reduce the confusion we've had in the
past over the differences in userspace and kernel names for network device
and vport types.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-23 20:01:30 -08:00
+								        return EINVAL;
 								    }
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    request.name = name;
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    options = netdev_vport_get_options(netdev);
 								    if (options && options->size) {
 								        request.options = options->data;
 								        request.options_len = options->size;
 								    }
 								    error = dpif_linux_vport_transact(&request, &reply, &buf);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    if (!error) {
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								        *port_nop = reply.port_no;
 								        ofpbuf_delete(buf);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    }
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    return error;
 								}
 								static int
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								dpif_linux_port_del(struct dpif *dpif_, uint16_t port_no)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    struct dpif_linux_vport vport;
 								    dpif_linux_vport_init(&vport);
 								    vport.cmd = ODP_VPORT_DEL;
 								    vport.dp_idx = dpif->minor;
 								    vport.port_no = port_no;
 								    return dpif_linux_vport_transact(&vport, NULL, NULL);
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								}
-												dpif-linux: Clean up vports that are no longer in config.

If the config changes while ovs-vswitchd is not running it is possible
that there could be some vports which are no longer needed but won't
be destroyed when closed because they aren't open.  This deletes
unneeded vports at the same time that we clean up unneeded datapaths.

											
										
										
											2010-04-10 01:19:29 -04:00
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								static int
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								dpif_linux_port_query__(const struct dpif *dpif, uint32_t port_no,
 								                        const char *port_name, struct dpif_port *dpif_port)
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    struct dpif_linux_vport request;
 								    struct dpif_linux_vport reply;
 								    struct ofpbuf *buf;
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								    int error;
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    dpif_linux_vport_init(&request);
 								    request.cmd = ODP_VPORT_GET;
 								    request.dp_idx = dpif_linux_cast(dpif)->minor;
 								    request.port_no = port_no;
 								    request.name = port_name;
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    error = dpif_linux_vport_transact(&request, &reply, &buf);
 								    if (!error) {
 								        dpif_port->name = xstrdup(reply.name);
 								        dpif_port->type = xstrdup(netdev_vport_get_netdev_type(&reply));
 								        dpif_port->port_no = reply.port_no;
 								        ofpbuf_delete(buf);
-												dpif-linux: Clean up vports that are no longer in config.

If the config changes while ovs-vswitchd is not running it is possible
that there could be some vports which are no longer needed but won't
be destroyed when closed because they aren't open.  This deletes
unneeded vports at the same time that we clean up unneeded datapaths.

											
										
										
											2010-04-10 01:19:29 -04:00
+								    }
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    return error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								dpif_linux_port_query_by_number(const struct dpif *dpif, uint16_t port_no,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                                struct dpif_port *dpif_port)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    return dpif_linux_port_query__(dpif, port_no, NULL, dpif_port);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname,
 								                              struct dpif_port *dpif_port)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								    return dpif_linux_port_query__(dpif, 0, devname, dpif_port);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
-												datapath: Drop port information from odp_stats.

As with n_flows, n_ports was used regularly by userspace to determine how
much memory to allocate when listing ports, but it is no longer needed for
that.  max_ports, on the other hand, is necessary but it is also a fixed
value for the kernel datapath right now and if we expand it we can also
come up with a way to report the expanded value.

The remaining members of odp_stats are actually real statistics that I
intend to keep.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 09:24:59 -08:00
+								static int
 								dpif_linux_get_max_ports(const struct dpif *dpif OVS_UNUSED)
 								{
 								    /* If the datapath increases its range of supported ports, then it should
 								     * start reporting that. */
 								    return 1024;
 								}
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int
 								dpif_linux_flow_flush(struct dpif *dpif_)
 								{
-												datapath: Convert ODP_FLOW_* and ODP_EXECUTE to put dp_idx into message.

When the datapath moves to the Netlink protocol it won't have a minor
number to use, so we have to put the dp_idx in the message.

This also changes the kernel implementation of ODP_FLOW_FLUSH to do the
datapath locking inside flush_flows() instead of inside openvswitch_ioctl()
but doesn't change that command's userspace interface, which still passes
a datapath number as the ioctl argument.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-18 16:54:27 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    return ioctl(dpif->fd, ODP_FLOW_FLUSH, dpif->minor) ? errno : 0;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								struct dpif_linux_port_state {
 								    struct ofpbuf *buf;
 								    uint32_t next;
 								};
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								dpif_linux_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    *statep = xzalloc(sizeof(struct dpif_linux_port_state));
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    return 0;
 								}
 								static int
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								dpif_linux_port_dump_next(const struct dpif *dpif, void *state_,
-												dpif: Eliminate "struct odp_port" from client-visible interface.

Following this commit, "struct odp_port" is only used in Linux-specific
parts of OVS userspace code.  This allows the actual Linux datapath
interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-23 18:48:02 -08:00
+								                          struct dpif_port *dpif_port)
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    struct dpif_linux_port_state *state = state_;
 								    struct dpif_linux_vport request, reply;
 								    struct ofpbuf *buf;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    int error;
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    ofpbuf_delete(state->buf);
 								    state->buf = NULL;
 								    dpif_linux_vport_init(&request);
 								    request.cmd = ODP_VPORT_DUMP;
 								    request.dp_idx = dpif_linux_cast(dpif)->minor;
 								    request.port_no = state->next;
 								    error = dpif_linux_vport_transact(&request, &reply, &buf);
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								    if (error) {
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								        return error == ENODEV ? EOF : error;
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    } else {
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								        dpif_port->name = (char *) reply.name;
 								        dpif_port->type = (char *) netdev_vport_get_netdev_type(&reply);
 								        dpif_port->port_no = reply.port_no;
 								        state->buf = buf;
 								        state->next = reply.port_no + 1;
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								        return 0;
-												datapath: Make adding and attaching a vport a single step.

For some time now, Open vSwitch datapaths have internally made a
distinction between adding a vport and attaching it to a datapath.  Adding
a vport just means to create it, as an entity detached from any datapath.
Attaching it gives it a port number and a datapath.  Similarly, a vport
could be detached and deleted separately.

After some study, I think I understand why this distinction exists.  It is
because ovs-vswitchd tries to open all the datapath ports before it tries
to create them.  However, changing it to create them before it tries to
open them is not difficult, so this commit does this.

The bulk of this commit, however, changes the datapath interface to one
that always creates a vport and attaches it to a datapath in a single step,
and similarly detaches a vport and deletes it in a single step.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-03 14:41:38 -08:00
+								    }
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								}
 								static int
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								dpif_linux_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    struct dpif_linux_port_state *state = state_;
 								    ofpbuf_delete(state->buf);
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    free(state);
 								    return 0;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								static int
 								dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep)
 								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												netdev: Implement an abstract interface to network devices.

This new abstraction layer allows multiple implementations of network
devices in a single running process.  This will be useful, for example, to
support network devices that are simulated entirely in the running process
or that communicate with other processes over Unix domain sockets, etc.

The reimplemented tap device support in this commit has not been tested.

											
										
										
											2009-07-30 16:04:45 -07:00
+								    if (dpif->change_error) {
 								        dpif->change_error = false;
-												dpif-linux: Use hash instead of sorted array.

With 1000 network devices being added or removed, sorting the array was a
profiling hot spot.  Using a hash makes it drop off the profile.

											
										
										
											2010-05-03 13:47:28 -07:00
+								        shash_clear(&dpif->changed_ports);
-												netdev: Implement an abstract interface to network devices.

This new abstraction layer allows multiple implementations of network
devices in a single running process.  This will be useful, for example, to
support network devices that are simulated entirely in the running process
or that communicate with other processes over Unix domain sockets, etc.

The reimplemented tap device support in this commit has not been tested.

											
										
										
											2009-07-30 16:04:45 -07:00
+								        return ENOBUFS;
-												dpif-linux: Use hash instead of sorted array.

With 1000 network devices being added or removed, sorting the array was a
profiling hot spot.  Using a hash makes it drop off the profile.

											
										
										
											2010-05-03 13:47:28 -07:00
+								    } else if (!shash_is_empty(&dpif->changed_ports)) {
 								        struct shash_node *node = shash_first(&dpif->changed_ports);
-												shash: New function shash_steal().

											
										
										
											2010-09-23 09:42:30 -07:00
+								        *devnamep = shash_steal(&dpif->changed_ports, node);
-												netdev: Implement an abstract interface to network devices.

This new abstraction layer allows multiple implementations of network
devices in a single running process.  This will be useful, for example, to
support network devices that are simulated entirely in the running process
or that communicate with other processes over Unix domain sockets, etc.

The reimplemented tap device support in this commit has not been tested.

											
										
										
											2009-07-30 16:04:45 -07:00
+								        return 0;
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								    } else {
-												netdev: Implement an abstract interface to network devices.

This new abstraction layer allows multiple implementations of network
devices in a single running process.  This will be useful, for example, to
support network devices that are simulated entirely in the running process
or that communicate with other processes over Unix domain sockets, etc.

The reimplemented tap device support in this commit has not been tested.

											
										
										
											2009-07-30 16:04:45 -07:00
+								        return EAGAIN;
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								    }
 								}
 								static void
 								dpif_linux_port_poll_wait(const struct dpif *dpif_)
 								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												dpif-linux: Use hash instead of sorted array.

With 1000 network devices being added or removed, sorting the array was a
profiling hot spot.  Using a hash makes it drop off the profile.

											
										
										
											2010-05-03 13:47:28 -07:00
+								    if (!shash_is_empty(&dpif->changed_ports) || dpif->change_error) {
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								        poll_immediate_wake();
 								    } else {
-												rtnetlink: Remove LINK specific messages from rtnetlink

Abstracted rtnetlink so that it may be used for messages other than
RTM LINK messages.  Created a new rtnetlink-link module which
specifically deals with these kinds of messages and follows the old
rtnetlink API.

											
										
										
											2010-12-21 13:44:37 -08:00
+								        rtnetlink_link_notifier_wait();
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								    }
 								}
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int
-												datapath: Eliminate 'flags' member from odp_flow.

Nothing was productively using the 'flags' member of odp_flow, so this
commit removes it.

ODPFF_ZERO_TCP_FLAGS isn't used at all (as of the previous commit).

ODPFF_EOF has been replaced by a special case of the 'key_len' member.
This will go away, too, once AF_NETLINK starts being used.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-17 14:43:30 -08:00
+								dpif_linux_flow_get(const struct dpif *dpif_,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                    const struct nlattr *key, size_t key_len,
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								                    struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Convert ODP_FLOW_* and ODP_EXECUTE to put dp_idx into message.

When the datapath moves to the Netlink protocol it won't have a minor
number to use, so we have to put the dp_idx in the message.

This also changes the kernel implementation of ODP_FLOW_FLUSH to do the
datapath locking inside flush_flows() instead of inside openvswitch_ioctl()
but doesn't change that command's userspace interface, which still passes
a datapath number as the ioctl argument.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-18 16:54:27 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux_flow request, reply;
 								    struct ofpbuf *buf;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    int error;
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    dpif_linux_flow_init(&request);
 								    request.cmd = ODP_FLOW_GET;
 								    request.dp_idx = dpif->minor;
 								    request.key = key;
 								    request.key_len = key_len;
 								    error = dpif_linux_flow_transact(&request, &reply, &buf);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    if (!error) {
 								        if (stats) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								            dpif_linux_flow_get_stats(&reply, stats);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								        }
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        if (actionsp) {
 								            buf->data = (void *) reply.actions;
 								            buf->size = reply.actions_len;
 								            *actionsp = buf;
 								        } else {
 								            ofpbuf_delete(buf);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								        }
 								    }
 								    return error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
-												dpif: Eliminate ODPPF_* constants from client-visible interface.

Following this commit, the ODPPF_* constants are only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:12:24 -08:00
+								dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                    const struct nlattr *key, size_t key_len,
 								                    const struct nlattr *actions, size_t actions_len,
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								                    struct dpif_flow_stats *stats)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Convert ODP_FLOW_* and ODP_EXECUTE to put dp_idx into message.

When the datapath moves to the Netlink protocol it won't have a minor
number to use, so we have to put the dp_idx in the message.

This also changes the kernel implementation of ODP_FLOW_FLUSH to do the
datapath locking inside flush_flows() instead of inside openvswitch_ioctl()
but doesn't change that command's userspace interface, which still passes
a datapath number as the ioctl argument.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-18 16:54:27 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux_flow request, reply;
 								    struct ofpbuf *buf;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    int error;
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    dpif_linux_flow_init(&request);
 								    request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_NEW : ODP_FLOW_SET;
 								    request.dp_idx = dpif->minor;
 								    request.key = key;
 								    request.key_len = key_len;
 								    request.actions = actions;
 								    request.actions_len = actions_len;
-												dpif: Eliminate ODPPF_* constants from client-visible interface.

Following this commit, the ODPPF_* constants are only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:12:24 -08:00
+								    if (flags & DPIF_FP_ZERO_STATS) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        request.clear = true;
-												dpif: Eliminate ODPPF_* constants from client-visible interface.

Following this commit, the ODPPF_* constants are only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:12:24 -08:00
+								    }
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    request.nlmsg_flags = flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE;
 								    error = dpif_linux_flow_transact(&request,
 								                                     stats ? &reply : NULL,
 								                                     stats ? &buf : NULL);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    if (!error && stats) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        dpif_linux_flow_get_stats(&reply, stats);
 								        ofpbuf_delete(buf);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    }
 								    return error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								dpif_linux_flow_del(struct dpif *dpif_,
 								                    const struct nlattr *key, size_t key_len,
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								                    struct dpif_flow_stats *stats)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Convert ODP_FLOW_* and ODP_EXECUTE to put dp_idx into message.

When the datapath moves to the Netlink protocol it won't have a minor
number to use, so we have to put the dp_idx in the message.

This also changes the kernel implementation of ODP_FLOW_FLUSH to do the
datapath locking inside flush_flows() instead of inside openvswitch_ioctl()
but doesn't change that command's userspace interface, which still passes
a datapath number as the ioctl argument.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-18 16:54:27 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux_flow request, reply;
 								    struct ofpbuf *buf;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    int error;
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    dpif_linux_flow_init(&request);
 								    request.cmd = ODP_FLOW_DEL;
 								    request.dp_idx = dpif->minor;
 								    request.key = key;
 								    request.key_len = key_len;
 								    error = dpif_linux_flow_transact(&request,
 								                                     stats ? &reply : NULL,
 								                                     stats ? &buf : NULL);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    if (!error && stats) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								        dpif_linux_flow_get_stats(&reply, stats);
 								        ofpbuf_delete(buf);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    }
 								    return error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								struct dpif_linux_flow_state {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux_flow flow;
 								    struct ofpbuf *buf;
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								    struct dpif_flow_stats stats;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								};
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								dpif_linux_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    *statep = xzalloc(sizeof(struct dpif_linux_flow_state));
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    return 0;
 								}
 								static int
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_,
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								                          const struct nlattr **key, size_t *key_len,
 								                          const struct nlattr **actions, size_t *actions_len,
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								                          const struct dpif_flow_stats **stats)
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								    struct dpif_linux_flow_state *state = state_;
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct ofpbuf *old_buf = state->buf;
 								    struct dpif_linux_flow request;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    int error;
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    dpif_linux_flow_init(&request);
 								    request.cmd = ODP_FLOW_DUMP;
 								    request.dp_idx = dpif->minor;
 								    request.state = state->flow.state;
 								    error = dpif_linux_flow_transact(&request, &state->flow, &state->buf);
 								    ofpbuf_delete(old_buf);
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
 								    if (!error) {
 								        if (key) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								            *key = state->flow.key;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								            *key_len = state->flow.key_len;
 								        }
 								        if (actions) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								            *actions = state->flow.actions;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								            *actions_len = state->flow.actions_len;
 								        }
 								        if (stats) {
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								            dpif_linux_flow_get_stats(&state->flow, &state->stats);
-												dpif: Eliminate "struct odp_flow_stats" from client-visible interface.

Following this commit, "struct odp_flow_stats" is only used in
Linux-specific parts of OVS userspace code.  This allows the actual Linux
datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:11:50 -08:00
+								            *stats = &state->stats;
-												dpif: Eliminate "struct odp_flow" from client-visible interface.

Following this commit, "struct odp_flow" and related data structures are
only used in Linux-specific parts of OVS userspace code.  This allows the
actual Linux datapath interface to evolve more freely.

Reviewed by Justin Pettit.

											
										
										
											2011-01-26 07:03:39 -08:00
+								        }
 								    }
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    return error == ENODEV ? EOF : error;
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								}
 								static int
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux_flow_state *state = state_;
 								    ofpbuf_delete(state->buf);
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    free(state);
 								    return 0;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
-												datapath: Remove implementation of port groups.

The "port group" concept seems like a good one, but it has not been
used very much in userspace so far, so before we commit ourselves to
a frozen API that we must maintain forever, remove it.  We can always
add it back in later as a new kind of vport.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2010-10-08 16:36:13 -07:00
+								dpif_linux_execute(struct dpif *dpif_,
-												vswitchd: Consistently use size_t for action lengths.

Currently the type of the datapath action length is mixture of
size_t and unsigned int.  However, size_t is really defined as an
unsigned long, which causes the build to fail on 64-bit platforms.
This consistently uses size_t.

											
										
										
											2010-12-11 22:51:31 -08:00
+								                   const struct nlattr *actions, size_t actions_len,
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								                   const struct ofpbuf *buf)
 								{
-												datapath: Convert ODP_FLOW_* and ODP_EXECUTE to put dp_idx into message.

When the datapath moves to the Netlink protocol it won't have a minor
number to use, so we have to put the dp_idx in the message.

This also changes the kernel implementation of ODP_FLOW_FLUSH to do the
datapath locking inside flush_flows() instead of inside openvswitch_ioctl()
but doesn't change that command's userspace interface, which still passes
a datapath number as the ioctl argument.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-18 16:54:27 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    struct odp_execute execute;
-												datapath: Convert ODP_FLOW_* and ODP_EXECUTE to put dp_idx into message.

When the datapath moves to the Netlink protocol it won't have a minor
number to use, so we have to put the dp_idx in the message.

This also changes the kernel implementation of ODP_FLOW_FLUSH to do the
datapath locking inside flush_flows() instead of inside openvswitch_ioctl()
but doesn't change that command's userspace interface, which still passes
a datapath number as the ioctl argument.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-18 16:54:27 -08:00
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    memset(&execute, 0, sizeof execute);
-												datapath: Convert ODP_FLOW_* and ODP_EXECUTE to put dp_idx into message.

When the datapath moves to the Netlink protocol it won't have a minor
number to use, so we have to put the dp_idx in the message.

This also changes the kernel implementation of ODP_FLOW_FLUSH to do the
datapath locking inside flush_flows() instead of inside openvswitch_ioctl()
but doesn't change that command's userspace interface, which still passes
a datapath number as the ioctl argument.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-18 16:54:27 -08:00
+								    execute.dp_idx = dpif->minor;
-												datapath: Replace "struct odp_action" by Netlink attributes.

In the medium term, we plan to migrate the datapath to use Netlink as its
communication channel.  In the short term, we need to be able to have
actions with 64-bit arguments but "struct odp_action" only has room for
48 bits.  So this patch shifts to variable-length arguments using Netlink
attributes, which starts in on the Netlink transition and makes 64-bit
arguments possible at the same time.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-10 10:40:58 -08:00
+								    execute.actions = (struct nlattr *) actions;
 								    execute.actions_len = actions_len;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    execute.data = buf->data;
 								    execute.length = buf->size;
 								    return do_ioctl(dpif_, ODP_EXECUTE, &execute);
 								}
 								static int
 								dpif_linux_recv_get_mask(const struct dpif *dpif_, int *listen_mask)
 								{
 								    return do_ioctl(dpif_, ODP_GET_LISTEN_MASK, listen_mask);
 								}
 								static int
 								dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
 								{
 								    return do_ioctl(dpif_, ODP_SET_LISTEN_MASK, &listen_mask);
 								}
-												Initial implementation of sFlow.

Tested very slightly with "ping" and "sflowtool -t | tcpdump -r -".

											
										
										
											2010-01-04 13:08:37 -08:00
+								static int
 								dpif_linux_get_sflow_probability(const struct dpif *dpif_,
 								                                 uint32_t *probability)
 								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux_dp dp;
 								    struct ofpbuf *buf;
 								    int error;
 								    error = dpif_linux_dp_get(dpif_, &dp, &buf);
 								    if (!error) {
 								        *probability = dp.sampling ? *dp.sampling : 0;
 								        ofpbuf_delete(buf);
 								    }
 								    return error;
-												Initial implementation of sFlow.

Tested very slightly with "ping" and "sflowtool -t | tcpdump -r -".

											
										
										
											2010-01-04 13:08:37 -08:00
+								}
 								static int
 								dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability)
 								{
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    struct dpif_linux_dp dp;
 								    dpif_linux_dp_init(&dp);
 								    dp.cmd = ODP_DP_SET;
 								    dp.dp_idx = dpif->minor;
 								    dp.sampling = &probability;
 								    return dpif_linux_dp_transact(&dp, NULL, NULL);
-												Initial implementation of sFlow.

Tested very slightly with "ping" and "sflowtool -t | tcpdump -r -".

											
										
										
											2010-01-04 13:08:37 -08:00
+								}
-												dpif: Abstract translation from OpenFlow queue ID into ODP priority value.

When the QoS code was integrated, I didn't yet know how to abstract the
translation from a queue ID in an OpenFlow OFPAT_ENQUEUE action into a
priority value for an ODP ODPAT_SET_PRIORITY action.  This commit is a
first attempt that works OK for Linux, so far.  It's possible that in fact
this translation needs the 'netdev' as an argument too, but it's not needed
yet.

											
										
										
											2010-07-20 11:23:21 -07:00
+								static int
 								dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
 								                             uint32_t queue_id, uint32_t *priority)
 								{
 								    if (queue_id < 0xf000) {
-												netdev-linux: Avoid minor number 0 in traffic control.

Linux traffic control handles with minor number 0 refer to qdiscs, not
to classes.  This commit deals with this by using a conversion function:
OpenFlow queue 0 maps to minor 1, queue 1 to minor 2, and so on.

											
										
										
											2010-07-16 15:50:57 -07:00
+								        *priority = TC_H_MAKE(1 << 16, queue_id + 1);
-												dpif: Abstract translation from OpenFlow queue ID into ODP priority value.

When the QoS code was integrated, I didn't yet know how to abstract the
translation from a queue ID in an OpenFlow OFPAT_ENQUEUE action into a
priority value for an ODP ODPAT_SET_PRIORITY action.  This commit is a
first attempt that works OK for Linux, so far.  It's possible that in fact
this translation needs the 'netdev' as an argument too, but it's not needed
yet.

											
										
										
											2010-07-20 11:23:21 -07:00
+								        return 0;
 								    } else {
 								        return EINVAL;
 								    }
 								}
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall)
 								{
 								    static const struct nl_policy odp_packet_policy[] = {
 								        /* Always present. */
 								        [ODP_PACKET_ATTR_TYPE] = { .type = NL_A_U32 },
 								        [ODP_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC,
 								                                     .min_len = ETH_HEADER_LEN },
 								        [ODP_PACKET_ATTR_KEY] = { .type = NL_A_NESTED },
 								        /* _ODPL_ACTION_NR only. */
 								        [ODP_PACKET_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true },
 								        /* _ODPL_SFLOW_NR only. */
 								        [ODP_PACKET_ATTR_SAMPLE_POOL] = { .type = NL_A_U32, .optional = true },
 								        [ODP_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
 								    };
 								    struct odp_packet *odp_packet = buf->data;
 								    struct nlattr *a[ARRAY_SIZE(odp_packet_policy)];
 								    if (!nl_policy_parse(buf, sizeof *odp_packet, odp_packet_policy,
 								                         a, ARRAY_SIZE(odp_packet_policy))) {
 								        return EINVAL;
 								    }
 								    memset(upcall, 0, sizeof *upcall);
 								    upcall->type = nl_attr_get_u32(a[ODP_PACKET_ATTR_TYPE]);
 								    upcall->packet = buf;
 								    upcall->packet->data = (void *) nl_attr_get(a[ODP_PACKET_ATTR_PACKET]);
 								    upcall->packet->size = nl_attr_get_size(a[ODP_PACKET_ATTR_PACKET]);
 								    upcall->key = (void *) nl_attr_get(a[ODP_PACKET_ATTR_KEY]);
 								    upcall->key_len = nl_attr_get_size(a[ODP_PACKET_ATTR_KEY]);
 								    upcall->userdata = (a[ODP_PACKET_ATTR_USERDATA]
 								                        ? nl_attr_get_u64(a[ODP_PACKET_ATTR_USERDATA])
 								                        : 0);
 								    upcall->sample_pool = (a[ODP_PACKET_ATTR_SAMPLE_POOL]
 								                        ? nl_attr_get_u32(a[ODP_PACKET_ATTR_SAMPLE_POOL])
 								                           : 0);
 								    if (a[ODP_PACKET_ATTR_ACTIONS]) {
 								        upcall->actions = (void *) nl_attr_get(a[ODP_PACKET_ATTR_ACTIONS]);
 								        upcall->actions_len = nl_attr_get_size(a[ODP_PACKET_ATTR_ACTIONS]);
 								    }
 								    return 0;
 								}
 								static int
 								dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    struct ofpbuf *buf;
 								    int retval;
 								    int error;
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								    buf = ofpbuf_new(65536);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    retval = read(dpif->fd, ofpbuf_tail(buf), ofpbuf_tailroom(buf));
 								    if (retval < 0) {
 								        error = errno;
 								        if (error != EAGAIN) {
 								            VLOG_WARN_RL(&error_rl, "%s: read failed: %s",
 								                         dpif_name(dpif_), strerror(error));
 								        }
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								    } else if (retval >= sizeof(struct odp_packet)) {
 								        struct odp_packet *odp_packet = buf->data;
 								        buf->size += retval;
 								        if (odp_packet->len <= retval) {
 								            error = parse_odp_packet(buf, upcall);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								        } else {
 								            VLOG_WARN_RL(&error_rl, "%s: discarding message truncated "
-												Merge citrix branch into master.

											
										
										
											2009-11-10 15:12:01 -08:00
+								                         "from %"PRIu32" bytes to %d",
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								                         dpif_name(dpif_), odp_packet->len, retval);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								            error = ERANGE;
 								        }
 								    } else if (!retval) {
 								        VLOG_WARN_RL(&error_rl, "%s: unexpected end of file", dpif_name(dpif_));
 								        error = EPROTO;
 								    } else {
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								        VLOG_WARN_RL(&error_rl, "%s: discarding too-short message (%d bytes)",
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								                     dpif_name(dpif_), retval);
 								        error = ERANGE;
 								    }
-												datapath: Report kernel's flow key when passing packets up to userspace.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.

This commit takes one step in that direction by making the kernel report
its idea of the flow that a packet belongs to whenever it passes a packet
up to userspace.  This means that userspace can intelligently figure out
what to do:

   - If userspace's notion of the flow for the packet matches the kernel's,
     then nothing special is necessary.

   - If the kernel has a more specific notion for the flow than userspace,
     for example if the kernel decoded IPv6 headers but userspace stopped
     at the Ethernet type (because it does not understand IPv6), then again
     nothing special is necessary: userspace can still set up the flow in
     the usual way.

   - If userspace has a more specific notion for the flow than the kernel,
     for example if userspace decoded an IPv6 header but the kernel
     stopped at the Ethernet type, then userspace can forward the packet
     manually, without setting up a flow in the kernel.  (This case is
     bad from a performance point of view, but at least it is correct.)

This commit does not actually make userspace flexible enough to handle
changes in the kernel flow key structure, although userspace does now
have enough information to do that intelligently.  This will have to wait
for later commits.

This commit is bigger than it would otherwise be because it is rolled
together with changing "struct odp_msg" to a sequence of Netlink
attributes.  The alternative, to do each of those changes in a separate
patch, seemed like overkill because it meant that either we would have to
introduce and then kill off Netlink attributes for in_port and tun_id, if
Netlink conversion went first, or shove yet another variable-length header
into the stuff already after odp_msg, if adding the flow key to odp_msg
went first.

This commit will slow down performance of checksumming packets sent up to
userspace.  I'm not entirely pleased with how I did it.  I considered a
couple of alternatives, but none of them seemed that much better.
Suggestions welcome.  Not changing anything wasn't an option,
unfortunately.  At any rate some slowdown will become unavoidable when OVS
actually starts using Netlink instead of just Netlink framing.

(Actually, I thought of one option where we could avoid that: make
userspace do the checksum instead, by passing csum_start and csum_offset as
part of what goes to userspace.  But that's not perfect either.)

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-24 14:59:57 -08:00
+								    if (error) {
 								        ofpbuf_delete(buf);
 								    }
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    return error;
 								}
 								static void
 								dpif_linux_recv_wait(struct dpif *dpif_)
 								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    poll_fd_wait(dpif->fd, POLLIN);
 								}
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								static void
 								dpif_linux_recv_purge(struct dpif *dpif_)
 								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    int i;
 								    /* This is somewhat bogus because it assumes that the following macros have
 								     * fixed values, but it's going to go away later.  */
 								#define DP_N_QUEUES 3
 								#define DP_MAX_QUEUE_LEN 100
 								    for (i = 0; i < DP_N_QUEUES * DP_MAX_QUEUE_LEN; i++) {
 								        /* Reading even 1 byte discards a whole datagram and saves time. */
 								        char buffer;
 								        if (read(dpif->fd, &buffer, 1) != 1) {
 								            break;
 								        }
 								    }
 								}
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								const struct dpif_class dpif_linux_class = {
-												dpif: Update dpif interface to match netdev.

This brings over some features that were added to the netdev interface,
most notably the separation between the name and the type.  In addition
to being cleaner, this also avoids problems where it is expected that
the local port has the same name as the datapath.

											
										
										
											2010-01-22 14:37:10 -05:00
+								    "system",
-												netdev: Implement an abstract interface to network devices.

This new abstraction layer allows multiple implementations of network
devices in a single running process.  This will be useful, for example, to
support network devices that are simulated entirely in the running process
or that communicate with other processes over Unix domain sockets, etc.

The reimplemented tap device support in this commit has not been tested.

											
										
										
											2009-07-30 16:04:45 -07:00
+								    NULL,
 								    NULL,
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								    dpif_linux_enumerate,
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    dpif_linux_open,
 								    dpif_linux_close,
-												vswitch: Avoid knowledge of details specific to Linux datapaths.

At startup, the vswitch needs to delete datapaths that are not configured
by the administrator.  Until now this was done by knowing the possible
names of Linux datapaths.  This commit cleans up by allowing each
datapath class to enumerate its existing datapaths and their names.

											
										
										
											2009-07-06 11:06:36 -07:00
+								    dpif_linux_get_all_names,
-												Fix some regressions from the merge from master.

											
										
										
											2010-02-08 13:22:41 -05:00
+								    dpif_linux_destroy,
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    dpif_linux_get_stats,
 								    dpif_linux_get_drop_frags,
 								    dpif_linux_set_drop_frags,
 								    dpif_linux_port_add,
 								    dpif_linux_port_del,
 								    dpif_linux_port_query_by_number,
 								    dpif_linux_port_query_by_name,
-												datapath: Drop port information from odp_stats.

As with n_flows, n_ports was used regularly by userspace to determine how
much memory to allocate when listing ports, but it is no longer needed for
that.  max_ports, on the other hand, is necessary but it is also a fixed
value for the kernel datapath right now and if we expand it we can also
come up with a way to report the expanded value.

The remaining members of odp_stats are actually real statistics that I
intend to keep.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 09:24:59 -08:00
+								    dpif_linux_get_max_ports,
-												datapath: Change listing ports to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.  In
turn, that means that the odp_port structure must become variable-length.
This does not, however, fit in well with the ODP_PORT_LIST ioctl in its
current form, because that would require userspace to know how much space
to allocate for each port in advance, or to allocate as much space as
could possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_PORT_LIST
by a new ioctl ODP_VPORT_DUMP that retrieves information about a single
vport from the datapath on each call.  It is much cleaner to allocate the
maximum amount of space for a single vport than to do so for possibly a
large number of vports.

It would be faster to retrieve a number of vports in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

The Netlink version won't need to take the starting port number from
userspace, since Netlink sockets can keep track of that state as part
of their "dump" feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-10 13:12:12 -08:00
+								    dpif_linux_port_dump_start,
 								    dpif_linux_port_dump_next,
 								    dpif_linux_port_dump_done,
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								    dpif_linux_port_poll,
 								    dpif_linux_port_poll_wait,
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    dpif_linux_flow_get,
 								    dpif_linux_flow_put,
 								    dpif_linux_flow_del,
 								    dpif_linux_flow_flush,
-												datapath: Change listing flows to use an iterator concept.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to change
the kernel's idea of the flow key separately from the userspace version.
In turn, that means that flow keys must become variable-length.  This does
not, however, fit in well with the ODP_FLOW_LIST ioctl in its current form,
because that would require userspace to know how much space to allocate
for each flow's key in advance, or to allocate as much space as could
possibly be needed.  Neither choice is very attractive.

This commit prepares for a different solution, by replacing ODP_FLOW_LIST
by a new ioctl ODP_FLOW_DUMP that retrieves a single flow from the datapath
on each call.  It is much cleaner to allocate the maximum amount of space
for a single flow key than to do so for possibly a very large number of
flow keys.

As a side effect, this patch also fixes a race condition that sometimes
made "ovs-dpctl dump-flows" print an error: previously, flows were listed
and then their actions were retrieved, which left a window in which
ovs-vswitchd could delete the flow.  Now dumping a flow and its actions is
a single step, closing that window.

Dumping all of the flows in a datapath is no longer an atomic step, so now
it is possible to miss some flows or see a single flow twice during
iteration, if the flow table is modified by another process.  It doesn't
look like this should be a problem for ovs-vswitchd.

It would be faster to retrieve a number of flows in batch instead of just
one at a time, but that will naturally happen later when the kernel
datapath interface is changed to use Netlink, so this patch does not bother
with it.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-28 10:39:52 -08:00
+								    dpif_linux_flow_dump_start,
 								    dpif_linux_flow_dump_next,
 								    dpif_linux_flow_dump_done,
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    dpif_linux_execute,
 								    dpif_linux_recv_get_mask,
 								    dpif_linux_recv_set_mask,
-												Initial implementation of sFlow.

Tested very slightly with "ping" and "sflowtool -t | tcpdump -r -".

											
										
										
											2010-01-04 13:08:37 -08:00
+								    dpif_linux_get_sflow_probability,
 								    dpif_linux_set_sflow_probability,
-												dpif: Abstract translation from OpenFlow queue ID into ODP priority value.

When the QoS code was integrated, I didn't yet know how to abstract the
translation from a queue ID in an OpenFlow OFPAT_ENQUEUE action into a
priority value for an ODP ODPAT_SET_PRIORITY action.  This commit is a
first attempt that works OK for Linux, so far.  It's possible that in fact
this translation needs the 'netdev' as an argument too, but it's not needed
yet.

											
										
										
											2010-07-20 11:23:21 -07:00
+								    dpif_linux_queue_to_priority,
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    dpif_linux_recv,
 								    dpif_linux_recv_wait,
-												datapath: Drop queue information from odp_stats.

This queue information will be available through the kernel socket layer
once we move over to Netlink socket as transports, so we might as well get
rid of the redundancy.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-04 17:00:36 -08:00
+								    dpif_linux_recv_purge,
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								};
 								static int get_openvswitch_major(void);
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								static int get_major(const char *target);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
 								static int
 								do_ioctl(const struct dpif *dpif_, int cmd, const void *arg)
 								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    return ioctl(dpif->fd, cmd, arg) ? errno : 0;
 								}
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								bool
 								dpif_linux_is_internal_device(const char *name)
-												datapath: Drop datapath index and port number from Ethtool output.

I introduced this a long time ago as an efficient way for userspace to find
out whether and where an internal device was attached, but I've always
considered it an ugly kluge.  Now that ODP_VPORT_QUERY can fetch a vport's
info regardless of datapath, it is no longer necessary.  This commit
stops using Ethtool for this purpose and drops the feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-29 14:20:16 -08:00
+								{
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    struct dpif_linux_vport reply;
 								    struct ofpbuf *buf;
-												datapath: Drop datapath index and port number from Ethtool output.

I introduced this a long time ago as an efficient way for userspace to find
out whether and where an internal device was attached, but I've always
considered it an ugly kluge.  Now that ODP_VPORT_QUERY can fetch a vport's
info regardless of datapath, it is no longer necessary.  This commit
stops using Ethtool for this purpose and drops the feature.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-12-29 14:20:16 -08:00
+								    int error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    error = dpif_linux_vport_get(name, &reply, &buf);
 								    if (!error) {
 								        ofpbuf_delete(buf);
 								    } else if (error != ENODEV) {
 								        VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)",
 								                     name, strerror(error));
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    }
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    return reply.type == ODP_VPORT_TYPE_INTERNAL;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
 								make_openvswitch_device(int minor, char **fnp)
 								{
 								    const char dirname[] = "/dev/net";
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								    int major;
 								    dev_t dev;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    struct stat s;
 								    char fn[128];
-												dpif-linux: Always set *fnp in make_openvswitch_device().

Some versions of GCC warn about this.  Always initializing it seems like
the right thing to do, since we "almost always" initialized it before.

Reported-by: Neil McKee <neil.mckee@inmon.com>

											
										
										
											2010-01-19 10:10:52 -08:00
+								    *fnp = NULL;
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								    major = get_openvswitch_major();
 								    if (major < 0) {
 								        return -major;
 								    }
 								    dev = makedev(major, minor);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    sprintf(fn, "%s/dp%d", dirname, minor);
 								    if (!stat(fn, &s)) {
 								        if (!S_ISCHR(s.st_mode)) {
 								            VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing",
 								                         fn);
 								        } else if (s.st_rdev != dev) {
 								            VLOG_WARN_RL(&error_rl,
-												dpif-linux: Clarify bad device warning message

The message warning that the device number is wrong for the Open vSwitch
devices could have been clearer.

Thanks to Ben Pfaff for the suggested wording.

											
										
										
											2009-10-02 16:59:28 -07:00
+								                         "%s is device %u:%u but should be %u:%u, fixing",
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								                         fn, major(s.st_rdev), minor(s.st_rdev),
 								                         major(dev), minor(dev));
 								        } else {
 								            goto success;
 								        }
 								        if (unlink(fn)) {
 								            VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)",
 								                         fn, strerror(errno));
 								            return errno;
 								        }
 								    } else if (errno == ENOENT) {
 								        if (stat(dirname, &s)) {
 								            if (errno == ENOENT) {
 								                if (mkdir(dirname, 0755)) {
 								                    VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)",
 								                                 dirname, strerror(errno));
 								                    return errno;
 								                }
 								            } else {
 								                VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)",
 								                             dirname, strerror(errno));
 								                return errno;
 								            }
 								        }
 								    } else {
 								        VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno));
 								        return errno;
 								    }
 								    /* The device needs to be created. */
 								    if (mknod(fn, S_IFCHR | 0700, dev)) {
 								        VLOG_WARN_RL(&error_rl,
 								                     "%s: creating character device %u:%u failed (%s)",
 								                     fn, major(dev), minor(dev), strerror(errno));
 								        return errno;
 								    }
 								success:
 								    *fnp = xstrdup(fn);
 								    return 0;
 								}
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								/* Return the major device number of the Open vSwitch device.  If it
 								 * cannot be determined, a negative errno is returned. */
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								static int
 								get_openvswitch_major(void)
 								{
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								    static int openvswitch_major = -1;
 								    if (openvswitch_major < 0) {
 								        openvswitch_major = get_major("openvswitch");
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    }
 								    return openvswitch_major;
 								}
 								static int
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								get_major(const char *target)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
 								    const char fn[] = "/proc/devices";
 								    char line[128];
 								    FILE *file;
 								    int ln;
 								    file = fopen(fn, "r");
 								    if (!file) {
 								        VLOG_ERR("opening %s failed (%s)", fn, strerror(errno));
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								        return -errno;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    }
 								    for (ln = 1; fgets(line, sizeof line, file); ln++) {
 								        char name[64];
 								        int major;
 								        if (!strncmp(line, "Character", 9) || line[0] == '\0') {
 								            /* Nothing to do. */
 								        } else if (!strncmp(line, "Block", 5)) {
 								            /* We only want character devices, so skip the rest of the file. */
 								            break;
 								        } else if (sscanf(line, "%d %63s", &major, name)) {
 								            if (!strcmp(name, target)) {
 								                fclose(file);
 								                return major;
 								            }
 								        } else {
-												vlog: Add VLOG_WARN_ONCE() and similar macros.

											
										
										
											2010-09-13 13:29:57 -07:00
+								            VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln);
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								        }
 								    }
-												dpif-linux: Fix file descriptor leak.

get_major() opens /proc/devices to get the openvswitch major number
but never closes the FD.

											
										
										
											2010-03-25 10:54:15 -04:00
+								    fclose(file);
-												dpif-linux: Fail earlier if OVS kernel module isn't loaded

When the kernel module isn't loaded, the bridge tries to open all the
possible minor devices, regardless.  This change first checks that there
is a major device number for Open vSwitch and only then tries to open the
minor devices.

This change also removes the assumption that there's a default Open vSwitch
major device number, since the kernel module always attempts to get a
dynamic one.  Maybe one day we'll have one...

Bug #1179

											
										
										
											2009-10-02 15:20:12 -07:00
+								    VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target);
 								    return -ENODEV;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
 								static int
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								open_minor(int minor, int *fdp)
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								{
 								    int error;
 								    char *fn;
 								    error = make_openvswitch_device(minor, &fn);
 								    if (error) {
 								        return error;
 								    }
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    *fdp = open(fn, O_RDONLY | O_NONBLOCK);
 								    if (*fdp < 0) {
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								        error = errno;
 								        VLOG_WARN("%s: open failed (%s)", fn, strerror(error));
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								        free(fn);
 								        return error;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								    }
 								    free(fn);
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    return 0;
-												dpif: Make dpifs abstract, to allow multiple datapath implementations.

This commit initially introduces only a single datapath implementation,
which is the same as the original one, but it paves the way for
additional implementations, such as the upcoming userspace datapath.

											
										
										
											2009-06-17 14:35:35 -07:00
+								}
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
 								static void
-												rtnetlink: Remove LINK specific messages from rtnetlink

Abstracted rtnetlink so that it may be used for messages other than
RTM LINK messages.  Created a new rtnetlink-link module which
specifically deals with these kinds of messages and follows the old
rtnetlink API.

											
										
										
											2010-12-21 13:44:37 -08:00
+								dpif_linux_port_changed(const struct rtnetlink_link_change *change,
 								                        void *dpif_)
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								{
 								    struct dpif_linux *dpif = dpif_;
-												netdev: Implement an abstract interface to network devices.

This new abstraction layer allows multiple implementations of network
devices in a single running process.  This will be useful, for example, to
support network devices that are simulated entirely in the running process
or that communicate with other processes over Unix domain sockets, etc.

The reimplemented tap device support in this commit has not been tested.

											
										
										
											2009-07-30 16:04:45 -07:00
+								    if (change) {
 								        if (change->master_ifindex == dpif->local_ifindex
 								            && (change->nlmsg_type == RTM_NEWLINK
 								                || change->nlmsg_type == RTM_DELLINK))
 								        {
 								            /* Our datapath changed, either adding a new port or deleting an
 								             * existing one. */
-												dpif-linux: Use hash instead of sorted array.

With 1000 network devices being added or removed, sorting the array was a
profiling hot spot.  Using a hash makes it drop off the profile.

											
										
										
											2010-05-03 13:47:28 -07:00
+								            shash_add_once(&dpif->changed_ports, change->ifname, NULL);
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								        }
-												netdev: Implement an abstract interface to network devices.

This new abstraction layer allows multiple implementations of network
devices in a single running process.  This will be useful, for example, to
support network devices that are simulated entirely in the running process
or that communicate with other processes over Unix domain sockets, etc.

The reimplemented tap device support in this commit has not been tested.

											
										
										
											2009-07-30 16:04:45 -07:00
+								    } else {
 								        dpif->change_error = true;
-												Introduce general-purpose ways to wait for dpif and netdev changes.

The dpif and netdev code has had various ways to check for changes to
dpifs and netdevs over the course of Open vSwitch development.  All of
these have been thus far fairly specific to the Linux implementation.  This
commit is the start of a more general API for watching for such changes.
The dpif-related parts seem fairly mature and so they are documented,
the netdev parts will probably need to change somewhat and so they are
not documented yet.

											
										
										
											2009-06-24 10:24:09 -07:00
+								    }
 								}
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
 								static int
 								get_dp0_fd(int *dp0_fdp)
 								{
 								    static int dp0_fd = -1;
 								    if (dp0_fd < 0) {
 								        int error;
 								        int fd;
 								        error = open_minor(0, &fd);
 								        if (error) {
 								            return error;
 								        }
 								        dp0_fd = fd;
 								    }
 								    *dp0_fdp = dp0_fd;
 								    return 0;
 								}
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
 								/* Parses the contents of 'buf', which contains a "struct odp_vport" followed
 								 * by Netlink attributes, into 'vport'.  Returns 0 if successful, otherwise a
 								 * positive errno value.
 								 *
 								 * 'vport' will contain pointers into 'buf', so the caller should not free
 								 * 'buf' while 'vport' is still in use. */
 								static int
 								dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport,
 								                             const struct ofpbuf *buf)
 								{
 								    static const struct nl_policy odp_vport_policy[] = {
 								        [ODP_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 },
 								        [ODP_VPORT_ATTR_TYPE] = { .type = NL_A_U32 },
 								        [ODP_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
 								        [ODP_VPORT_ATTR_STATS] = { .type = NL_A_UNSPEC,
 								                                   .min_len = sizeof(struct rtnl_link_stats64),
 								                                   .max_len = sizeof(struct rtnl_link_stats64),
 								                                   .optional = true },
 								        [ODP_VPORT_ATTR_ADDRESS] = { .type = NL_A_UNSPEC,
 								                                     .min_len = ETH_ADDR_LEN,
 								                                     .max_len = ETH_ADDR_LEN,
 								                                     .optional = true },
 								        [ODP_VPORT_ATTR_MTU] = { .type = NL_A_U32, .optional = true },
 								        [ODP_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true },
 								        [ODP_VPORT_ATTR_IFINDEX] = { .type = NL_A_U32, .optional = true },
 								        [ODP_VPORT_ATTR_IFLINK] = { .type = NL_A_U32, .optional = true },
 								    };
 								    struct odp_vport *odp_vport;
 								    struct nlattr *a[ARRAY_SIZE(odp_vport_policy)];
 								    dpif_linux_vport_init(vport);
 								    if (!nl_policy_parse(buf, sizeof *odp_vport, odp_vport_policy,
 								                         a, ARRAY_SIZE(odp_vport_policy))) {
 								        return EINVAL;
 								    }
 								    odp_vport = buf->data;
 								    vport->dp_idx = odp_vport->dp_idx;
 								    vport->port_no = nl_attr_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
 								    vport->type = nl_attr_get_u32(a[ODP_VPORT_ATTR_TYPE]);
 								    vport->name = nl_attr_get_string(a[ODP_VPORT_ATTR_NAME]);
 								    if (a[ODP_VPORT_ATTR_STATS]) {
 								        vport->stats = nl_attr_get(a[ODP_VPORT_ATTR_STATS]);
 								    }
 								    if (a[ODP_VPORT_ATTR_ADDRESS]) {
 								        vport->address = nl_attr_get(a[ODP_VPORT_ATTR_ADDRESS]);
 								    }
 								    if (a[ODP_VPORT_ATTR_MTU]) {
 								        vport->mtu = nl_attr_get_u32(a[ODP_VPORT_ATTR_MTU]);
 								    }
 								    if (a[ODP_VPORT_ATTR_OPTIONS]) {
 								        vport->options = nl_attr_get(a[ODP_VPORT_ATTR_OPTIONS]);
 								        vport->options_len = nl_attr_get_size(a[ODP_VPORT_ATTR_OPTIONS]);
 								    }
 								    if (a[ODP_VPORT_ATTR_IFINDEX]) {
 								        vport->ifindex = nl_attr_get_u32(a[ODP_VPORT_ATTR_IFINDEX]);
 								    }
 								    if (a[ODP_VPORT_ATTR_IFLINK]) {
 								        vport->iflink = nl_attr_get_u32(a[ODP_VPORT_ATTR_IFLINK]);
 								    }
 								    return 0;
 								}
 								/* Appends to 'buf' (which must initially be empty) a "struct odp_vport"
 								 * followed by Netlink attributes corresponding to 'vport'. */
 								static void
 								dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport,
 								                           struct ofpbuf *buf)
 								{
 								    struct odp_vport *odp_vport;
 								    ofpbuf_reserve(buf, sizeof odp_vport);
 								    if (vport->port_no != UINT32_MAX) {
 								        nl_msg_put_u32(buf, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
 								    }
 								    if (vport->type != ODP_VPORT_TYPE_UNSPEC) {
 								        nl_msg_put_u32(buf, ODP_VPORT_ATTR_TYPE, vport->type);
 								    }
 								    if (vport->name) {
 								        nl_msg_put_string(buf, ODP_VPORT_ATTR_NAME, vport->name);
 								    }
 								    if (vport->stats) {
 								        nl_msg_put_unspec(buf, ODP_VPORT_ATTR_STATS,
 								                          vport->stats, sizeof *vport->stats);
 								    }
 								    if (vport->address) {
 								        nl_msg_put_unspec(buf, ODP_VPORT_ATTR_ADDRESS,
 								                          vport->address, ETH_ADDR_LEN);
 								    }
 								    if (vport->mtu) {
 								        nl_msg_put_u32(buf, ODP_VPORT_ATTR_MTU, vport->mtu);
 								    }
 								    if (vport->options) {
 								        nl_msg_put_nested(buf, ODP_VPORT_ATTR_OPTIONS,
 								                          vport->options, vport->options_len);
 								    }
 								    if (vport->ifindex) {
 								        nl_msg_put_u32(buf, ODP_VPORT_ATTR_IFINDEX, vport->ifindex);
 								    }
 								    if (vport->iflink) {
 								        nl_msg_put_u32(buf, ODP_VPORT_ATTR_IFLINK, vport->iflink);
 								    }
 								    odp_vport = ofpbuf_push_uninit(buf, sizeof *odp_vport);
 								    odp_vport->dp_idx = vport->dp_idx;
 								    odp_vport->len = buf->size;
 								    odp_vport->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
 								}
 								/* Clears 'vport' to "empty" values. */
 								void
 								dpif_linux_vport_init(struct dpif_linux_vport *vport)
 								{
 								    memset(vport, 0, sizeof *vport);
 								    vport->dp_idx = UINT32_MAX;
 								    vport->port_no = UINT32_MAX;
 								}
 								/* Executes 'request' in the kernel datapath.  If the command fails, returns a
 								 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
 								 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
 								 * result of the command is expected to be an odp_vport also, which is decoded
 								 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
 								 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
 								int
 								dpif_linux_vport_transact(const struct dpif_linux_vport *request,
 								                          struct dpif_linux_vport *reply,
 								                          struct ofpbuf **bufp)
 								{
 								    struct ofpbuf *buf = NULL;
 								    int error;
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    int fd;
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
 								    assert((reply != NULL) == (bufp != NULL));
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    error = get_dp0_fd(&fd);
 								    if (error) {
 								        goto error;
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    }
 								    buf = ofpbuf_new(1024);
 								    dpif_linux_vport_to_ofpbuf(request, buf);
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
+								    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00
+								    if (error) {
 								        goto error;
 								    }
 								    if (bufp) {
 								        buf->size = ((struct odp_vport *) buf->data)->len;
 								        error = dpif_linux_vport_from_ofpbuf(reply, buf);
 								        if (error) {
 								            goto error;
 								        }
 								        *bufp = buf;
 								    } else {
 								        ofpbuf_delete(buf);
 								    }
 								    return 0;
 								error:
 								    ofpbuf_delete(buf);
 								    if (bufp) {
 								        memset(reply, 0, sizeof *reply);
 								        *bufp = NULL;
 								    }
 								    return error;
 								}
 								/* Obtains information about the kernel vport named 'name' and stores it into
 								 * '*reply' and '*bufp'.  The caller must free '*bufp' when the reply is no
 								 * longer needed ('reply' will contain pointers into '*bufp').  */
 								int
 								dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply,
 								                     struct ofpbuf **bufp)
 								{
 								    struct dpif_linux_vport request;
 								    dpif_linux_vport_init(&request);
 								    request.cmd = ODP_VPORT_GET;
 								    request.name = name;
 								    return dpif_linux_vport_transact(&request, reply, bufp);
 								}
-												datapath: Convert datapath operations to use Netlink framing.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 15:42:00 -08:00
 								/* Parses the contents of 'buf', which contains a "struct odp_datapath"
 								 * followed by Netlink attributes, into 'dp'.  Returns 0 if successful,
 								 * otherwise a positive errno value.
 								 *
 								 * 'dp' will contain pointers into 'buf', so the caller should not free 'buf'
 								 * while 'dp' is still in use. */
 								static int
 								dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf)
 								{
 								    static const struct nl_policy odp_datapath_policy[] = {
 								        [ODP_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ },
 								        [ODP_DP_ATTR_STATS] = { .type = NL_A_UNSPEC,
 								                                .min_len = sizeof(struct odp_stats),
 								                                .max_len = sizeof(struct odp_stats),
 								                                .optional = true },
 								        [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true },
 								        [ODP_DP_ATTR_SAMPLING] = { .type = NL_A_U32, .optional = true },
 								    };
 								    struct odp_datapath *odp_dp;
 								    struct nlattr *a[ARRAY_SIZE(odp_datapath_policy)];
 								    dpif_linux_dp_init(dp);
 								    if (!nl_policy_parse(buf, sizeof *odp_dp, odp_datapath_policy,
 								                         a, ARRAY_SIZE(odp_datapath_policy))) {
 								        return EINVAL;
 								    }
 								    odp_dp = buf->data;
 								    dp->dp_idx = odp_dp->dp_idx;
 								    dp->name = nl_attr_get_string(a[ODP_DP_ATTR_NAME]);
 								    if (a[ODP_DP_ATTR_STATS]) {
 								        /* Can't use structure assignment because Netlink doesn't ensure
 								         * sufficient alignment for 64-bit members. */
 								        memcpy(&dp->stats, nl_attr_get(a[ODP_DP_ATTR_STATS]),
 								               sizeof dp->stats);
 								    }
 								    if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
 								        dp->ipv4_frags = nl_attr_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
 								    }
 								    if (a[ODP_DP_ATTR_SAMPLING]) {
 								        dp->sampling = nl_attr_get(a[ODP_DP_ATTR_SAMPLING]);
 								    }
 								    return 0;
 								}
 								/* Appends to 'buf' (which must initially be empty) a "struct odp_datapath"
 								 * followed by Netlink attributes corresponding to 'dp'. */
 								static void
 								dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf)
 								{
 								    struct odp_datapath *odp_dp;
 								    ofpbuf_reserve(buf, sizeof odp_dp);
 								    if (dp->name) {
 								        nl_msg_put_string(buf, ODP_DP_ATTR_NAME, dp->name);
 								    }
 								    /* Skip ODP_DP_ATTR_STATS since we never have a reason to serialize it. */
 								    if (dp->ipv4_frags) {
 								        nl_msg_put_u32(buf, ODP_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags);
 								    }
 								    if (dp->sampling) {
 								        nl_msg_put_u32(buf, ODP_DP_ATTR_SAMPLING, *dp->sampling);
 								    }
 								    odp_dp = ofpbuf_push_uninit(buf, sizeof *odp_dp);
 								    odp_dp->dp_idx = dp->dp_idx;
 								    odp_dp->len = buf->size;
 								    odp_dp->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
 								}
 								/* Clears 'dp' to "empty" values. */
 								void
 								dpif_linux_dp_init(struct dpif_linux_dp *dp)
 								{
 								    memset(dp, 0, sizeof *dp);
 								    dp->dp_idx = -1;
 								}
 								/* Executes 'request' in the kernel datapath.  If the command fails, returns a
 								 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
 								 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
 								 * result of the command is expected to be an odp_datapath also, which is
 								 * decoded and stored in '*reply' and '*bufp'.  The caller must free '*bufp'
 								 * when the reply is no longer needed ('reply' will contain pointers into
 								 * '*bufp'). */
 								int
 								dpif_linux_dp_transact(const struct dpif_linux_dp *request,
 								                       struct dpif_linux_dp *reply, struct ofpbuf **bufp)
 								{
 								    struct ofpbuf *buf = NULL;
 								    int error;
 								    int fd;
 								    assert((reply != NULL) == (bufp != NULL));
 								    error = get_dp0_fd(&fd);
 								    if (error) {
 								        goto error;
 								    }
 								    buf = ofpbuf_new(1024);
 								    dpif_linux_dp_to_ofpbuf(request, buf);
 								    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
 								    if (error) {
 								        goto error;
 								    }
 								    if (bufp) {
 								        buf->size = ((struct odp_datapath *) buf->data)->len;
 								        error = dpif_linux_dp_from_ofpbuf(reply, buf);
 								        if (error) {
 								            goto error;
 								        }
 								        *bufp = buf;
 								    } else {
 								        ofpbuf_delete(buf);
 								    }
 								    return 0;
 								error:
 								    ofpbuf_delete(buf);
 								    if (bufp) {
 								        memset(reply, 0, sizeof *reply);
 								        *bufp = NULL;
 								    }
 								    return error;
 								}
 								/* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'.
 								 * The caller must free '*bufp' when the reply is no longer needed ('reply'
 								 * will contain pointers into '*bufp').  */
 								int
 								dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply,
 								                  struct ofpbuf **bufp)
 								{
 								    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
 								    struct dpif_linux_dp request;
 								    dpif_linux_dp_init(&request);
 								    request.cmd = ODP_DP_GET;
 								    request.dp_idx = dpif->minor;
 								    return dpif_linux_dp_transact(&request, reply, bufp);
 								}
 								/* Parses the contents of 'buf', which contains a "struct odp_flow" followed by
 								 * Netlink attributes, into 'flow'.  Returns 0 if successful, otherwise a
 								 * positive errno value.
 								 *
 								 * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
 								 * while 'flow' is still in use. */
 								static int
 								dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow,
 								                            const struct ofpbuf *buf)
 								{
 								    static const struct nl_policy odp_flow_policy[] = {
 								        [ODP_FLOW_ATTR_KEY] = { .type = NL_A_NESTED },
 								        [ODP_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
 								        [ODP_FLOW_ATTR_STATS] = { .type = NL_A_UNSPEC,
 								                                  .min_len = sizeof(struct odp_flow_stats),
 								                                  .max_len = sizeof(struct odp_flow_stats),
 								                                  .optional = true },
 								        [ODP_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
 								        [ODP_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
 								        /* The kernel never uses ODP_FLOW_ATTR_CLEAR. */
 								        [ODP_FLOW_ATTR_STATE] = { .type = NL_A_U64, .optional = true },
 								    };
 								    struct odp_flow *odp_flow;
 								    struct nlattr *a[ARRAY_SIZE(odp_flow_policy)];
 								    dpif_linux_flow_init(flow);
 								    if (!nl_policy_parse(buf, sizeof *odp_flow, odp_flow_policy,
 								                         a, ARRAY_SIZE(odp_flow_policy))) {
 								        return EINVAL;
 								    }
 								    odp_flow = buf->data;
 								    flow->nlmsg_flags = odp_flow->nlmsg_flags;
 								    flow->dp_idx = odp_flow->dp_idx;
 								    flow->key = nl_attr_get(a[ODP_FLOW_ATTR_KEY]);
 								    flow->key_len = nl_attr_get_size(a[ODP_FLOW_ATTR_KEY]);
 								    if (a[ODP_FLOW_ATTR_ACTIONS]) {
 								        flow->actions = nl_attr_get(a[ODP_FLOW_ATTR_ACTIONS]);
 								        flow->actions_len = nl_attr_get_size(a[ODP_FLOW_ATTR_ACTIONS]);
 								    }
 								    if (a[ODP_FLOW_ATTR_STATS]) {
 								        flow->stats = nl_attr_get(a[ODP_FLOW_ATTR_STATS]);
 								    }
 								    if (a[ODP_FLOW_ATTR_TCP_FLAGS]) {
 								        flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]);
 								    }
 								    if (a[ODP_FLOW_ATTR_STATE]) {
 								        flow->state = nl_attr_get(a[ODP_FLOW_ATTR_STATE]);
 								    }
 								    return 0;
 								}
 								/* Appends to 'buf' (which must initially be empty) a "struct odp_flow"
 								 * followed by Netlink attributes corresponding to 'flow'. */
 								static void
 								dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow,
 								                          struct ofpbuf *buf)
 								{
 								    struct odp_flow *odp_flow;
 								    ofpbuf_reserve(buf, sizeof odp_flow);
 								    if (flow->key_len) {
 								        nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len);
 								    }
 								    if (flow->actions_len) {
 								        nl_msg_put_unspec(buf, ODP_FLOW_ATTR_ACTIONS,
 								                          flow->actions, flow->actions_len);
 								    }
 								    /* We never need to send these to the kernel. */
 								    assert(!flow->stats);
 								    assert(!flow->tcp_flags);
 								    assert(!flow->used);
 								    if (flow->clear) {
 								        nl_msg_put_flag(buf, ODP_FLOW_ATTR_CLEAR);
 								    }
 								    if (flow->state) {
 								        nl_msg_put_u64(buf, ODP_FLOW_ATTR_STATE,
 								                       get_unaligned_u64(flow->state));
 								    }
 								    odp_flow = ofpbuf_push_uninit(buf, sizeof *odp_flow);
 								    odp_flow->nlmsg_flags = flow->nlmsg_flags;
 								    odp_flow->dp_idx = flow->dp_idx;
 								    odp_flow->len = buf->size;
 								    odp_flow->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
 								}
 								/* Clears 'flow' to "empty" values. */
 								void
 								dpif_linux_flow_init(struct dpif_linux_flow *flow)
 								{
 								    memset(flow, 0, sizeof *flow);
 								}
 								/* Executes 'request' in the kernel datapath.  If the command fails, returns a
 								 * positive errno value.  Otherwise, if 'reply' and 'bufp' are null, returns 0
 								 * without doing anything else.  If 'reply' and 'bufp' are nonnull, then the
 								 * result of the command is expected to be an odp_flow also, which is decoded
 								 * and stored in '*reply' and '*bufp'.  The caller must free '*bufp' when the
 								 * reply is no longer needed ('reply' will contain pointers into '*bufp'). */
 								int
 								dpif_linux_flow_transact(const struct dpif_linux_flow *request,
 								                         struct dpif_linux_flow *reply, struct ofpbuf **bufp)
 								{
 								    struct ofpbuf *buf = NULL;
 								    int error;
 								    int fd;
 								    assert((reply != NULL) == (bufp != NULL));
 								    error = get_dp0_fd(&fd);
 								    if (error) {
 								        goto error;
 								    }
 								    buf = ofpbuf_new(1024);
 								    dpif_linux_flow_to_ofpbuf(request, buf);
 								    error = ioctl(fd, request->cmd, buf->data) ? errno : 0;
 								    if (error) {
 								        goto error;
 								    }
 								    if (bufp) {
 								        buf->size = ((struct odp_flow *) buf->data)->len;
 								        error = dpif_linux_flow_from_ofpbuf(reply, buf);
 								        if (error) {
 								            goto error;
 								        }
 								        *bufp = buf;
 								    } else {
 								        ofpbuf_delete(buf);
 								    }
 								    return 0;
 								error:
 								    ofpbuf_delete(buf);
 								    if (bufp) {
 								        memset(reply, 0, sizeof *reply);
 								        *bufp = NULL;
 								    }
 								    return error;
 								}
 								static void
 								dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow,
 								                          struct dpif_flow_stats *stats)
 								{
 								    if (flow->stats) {
 								        stats->n_packets = get_unaligned_u64(&flow->stats->n_packets);
 								        stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes);
 								    } else {
 								        stats->n_packets = 0;
 								        stats->n_bytes = 0;
 								    }
 								    stats->used = flow->used ? get_unaligned_u64(flow->used) : 0;
 								    stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0;
 								}
-												datapath: Change userspace vport interface to use Netlink attributes.

One of the goals for Open vSwitch is to decouple kernel and userspace
software, so that either one can be upgraded or rolled back independent of
the other.  To do this in full generality, it must be possible to add new
features to the kernel vport layer without changing userspace software.
The customary way to do this in the Linux networking stack is to use
Netlink and in particular Netlink attributes.  This commit adopts that
model for the vport layer.  It does not yet actually start using the
Netlink socket layer, which will come later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2011-01-26 12:28:59 -08:00