2
0
mirror of https://github.com/openvswitch/ovs synced 2025-10-25 15:07:05 +00:00

datapath: Convert ODP_FLOW_* commands to use AF_NETLINK socket layer.

This completes the transition to the Generic Netlink interface, and
so this commit restores support for Linux 2.6.18 and later.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
This commit is contained in:
Ben Pfaff
2011-01-28 14:00:51 -08:00
parent f0fef76062
commit 37a1300c3c
5 changed files with 370 additions and 571 deletions

View File

@@ -12,7 +12,6 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/fs.h>
#include <linux/if_arp.h> #include <linux/if_arp.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/in.h> #include <linux/in.h>
@@ -44,7 +43,6 @@
#include <linux/dmi.h> #include <linux/dmi.h>
#include <net/inet_ecn.h> #include <net/inet_ecn.h>
#include <net/genetlink.h> #include <net/genetlink.h>
#include <linux/compat.h>
#include "openvswitch/datapath-protocol.h" #include "openvswitch/datapath-protocol.h"
#include "checksum.h" #include "checksum.h"
@@ -557,12 +555,12 @@ static int flush_flows(int dp_idx)
return 0; return 0;
} }
static int validate_actions(const struct nlattr *actions, u32 actions_len) static int validate_actions(const struct nlattr *attr)
{ {
const struct nlattr *a; const struct nlattr *a;
int rem; int rem;
nla_for_each_attr(a, actions, actions_len, rem) { nla_for_each_nested(a, attr, rem) {
static const u32 action_lens[ODPAT_MAX + 1] = { static const u32 action_lens[ODPAT_MAX + 1] = {
[ODPAT_OUTPUT] = 4, [ODPAT_OUTPUT] = 4,
[ODPAT_CONTROLLER] = 8, [ODPAT_CONTROLLER] = 8,
@@ -629,28 +627,6 @@ static int validate_actions(const struct nlattr *actions, u32 actions_len)
return 0; return 0;
} }
struct dp_flowcmd {
u32 nlmsg_flags;
u32 dp_idx;
u32 total_len;
struct sw_flow_key key;
const struct nlattr *actions;
u32 actions_len;
bool clear;
u64 state;
};
static struct sw_flow_actions *get_actions(const struct dp_flowcmd *flowcmd)
{
struct sw_flow_actions *actions;
actions = flow_actions_alloc(flowcmd->actions_len);
if (!IS_ERR(actions) && flowcmd->actions_len)
memcpy(actions->actions, flowcmd->actions, flowcmd->actions_len);
return actions;
}
static void clear_stats(struct sw_flow *flow) static void clear_stats(struct sw_flow *flow)
{ {
flow->used = 0; flow->used = 0;
@@ -680,8 +656,6 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct odp_header *odp_header = info->userhdr; struct odp_header *odp_header = info->userhdr;
struct nlattr **a = info->attrs; struct nlattr **a = info->attrs;
struct sk_buff *packet; struct sk_buff *packet;
unsigned int actions_len;
struct nlattr *actions;
struct sw_flow_key key; struct sw_flow_key key;
struct datapath *dp; struct datapath *dp;
struct ethhdr *eth; struct ethhdr *eth;
@@ -693,9 +667,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN) nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
goto exit; goto exit;
actions = nla_data(a[ODP_PACKET_ATTR_ACTIONS]); err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
actions_len = nla_len(a[ODP_PACKET_ATTR_ACTIONS]);
err = validate_actions(actions, actions_len);
if (err) if (err)
goto exit; goto exit;
@@ -725,7 +697,9 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
dp = get_dp(odp_header->dp_idx); dp = get_dp(odp_header->dp_idx);
err = -ENODEV; err = -ENODEV;
if (dp) if (dp)
err = execute_actions(dp, packet, &key, actions, actions_len); err = execute_actions(dp, packet, &key,
nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
rcu_read_unlock(); rcu_read_unlock();
exit: exit:
@@ -817,46 +791,49 @@ static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
[ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
[ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
[ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
[ODP_FLOW_ATTR_STATE] = { .type = NLA_U64 },
}; };
static struct genl_family dp_flow_genl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = sizeof(struct odp_header),
.name = ODP_FLOW_FAMILY,
.version = 1,
.maxattr = ODP_FLOW_ATTR_MAX
};
static int copy_flow_to_user(struct odp_flow __user *dst, struct datapath *dp, static struct genl_multicast_group dp_flow_multicast_group = {
struct sw_flow *flow, u32 total_len, u64 state) .name = ODP_FLOW_MCGROUP
};
/* Called with genl_lock. */
static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
{ {
const int skb_orig_len = skb->len;
const struct sw_flow_actions *sf_acts; const struct sw_flow_actions *sf_acts;
struct odp_flow_stats stats; struct odp_flow_stats stats;
struct odp_flow *odp_flow; struct odp_header *odp_header;
struct sk_buff *skb;
struct nlattr *nla; struct nlattr *nla;
unsigned long used; unsigned long used;
u8 tcp_flags; u8 tcp_flags;
int nla_len;
int err; int err;
sf_acts = rcu_dereference_protected(flow->sf_acts, sf_acts = rcu_dereference_protected(flow->sf_acts,
lockdep_genl_is_held()); lockdep_genl_is_held());
skb = alloc_skb(128 + FLOW_BUFSIZE + sf_acts->actions_len, GFP_KERNEL); odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
err = -ENOMEM; if (!odp_header)
if (!skb) return -EMSGSIZE;
goto exit;
odp_flow = (struct odp_flow*)__skb_put(skb, sizeof(struct odp_flow)); odp_header->dp_idx = dp->dp_idx;
odp_flow->dp_idx = dp->dp_idx;
odp_flow->total_len = total_len;
nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY); nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
if (!nla) if (!nla)
goto nla_put_failure; goto nla_put_failure;
err = flow_to_nlattrs(&flow->key, skb); err = flow_to_nlattrs(&flow->key, skb);
if (err) if (err)
goto exit_free; goto error;
nla_nest_end(skb, nla);
nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
if (!nla || skb_tailroom(skb) < sf_acts->actions_len)
goto nla_put_failure;
memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
nla_nest_end(skb, nla); nla_nest_end(skb, nla);
spin_lock_bh(&flow->lock); spin_lock_bh(&flow->lock);
@@ -875,130 +852,116 @@ static int copy_flow_to_user(struct odp_flow __user *dst, struct datapath *dp,
if (tcp_flags) if (tcp_flags)
NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags); NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
if (state) /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to
NLA_PUT_U64(skb, ODP_FLOW_ATTR_STATE, state); * be dumped into 'skb', then expand the skb. This is unusual for
* Netlink but individual action lists can be longer than a page and
* thus entirely undumpable if we didn't do this. */
nla_len = nla_total_size(sf_acts->actions_len);
if (nla_len > skb_tailroom(skb) && !skb_orig_len) {
int hdr_off = (unsigned char *)odp_header - skb->data;
if (skb->len > total_len) err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL);
goto nla_put_failure; if (err)
goto error;
odp_flow->len = skb->len; odp_header = (struct odp_header *)(skb->data + hdr_off);
err = copy_to_user(dst, skb->data, skb->len) ? -EFAULT : 0; }
goto exit_free; nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
nla_nest_end(skb, nla);
return genlmsg_end(skb, odp_header);
nla_put_failure: nla_put_failure:
err = -EMSGSIZE; err = -EMSGSIZE;
exit_free: error:
kfree_skb(skb); genlmsg_cancel(skb, odp_header);
exit:
return err; return err;
} }
/* Called with genl_mutex. */ static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow)
static struct sk_buff *copy_flow_from_user(struct odp_flow __user *uodp_flow, {
struct dp_flowcmd *flowcmd) const struct sw_flow_actions *sf_acts;
int len;
sf_acts = rcu_dereference_protected(flow->sf_acts,
lockdep_genl_is_held());
len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */
len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */
len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */
len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */
len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */
return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL);
}
static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
u32 pid, u32 seq, u8 cmd)
{ {
struct nlattr *a[ODP_FLOW_ATTR_MAX + 1];
struct odp_flow *odp_flow;
struct sk_buff *skb; struct sk_buff *skb;
u32 len; int retval;
int err;
if (get_user(len, &uodp_flow->len)) skb = odp_flow_cmd_alloc_info(flow);
return ERR_PTR(-EFAULT);
if (len < sizeof(struct odp_flow))
return ERR_PTR(-EINVAL);
skb = alloc_skb(len, GFP_KERNEL);
if (!skb) if (!skb)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
err = -EFAULT; retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
if (copy_from_user(__skb_put(skb, len), uodp_flow, len)) BUG_ON(retval < 0);
goto error_free_skb;
odp_flow = (struct odp_flow *)skb->data;
err = -EINVAL;
if (odp_flow->len != len)
goto error_free_skb;
flowcmd->nlmsg_flags = odp_flow->nlmsg_flags;
flowcmd->dp_idx = odp_flow->dp_idx;
flowcmd->total_len = odp_flow->total_len;
err = nla_parse(a, ODP_FLOW_ATTR_MAX,
(struct nlattr *)(skb->data + sizeof(struct odp_flow)),
skb->len - sizeof(struct odp_flow), flow_policy);
if (err)
goto error_free_skb;
/* ODP_FLOW_ATTR_KEY. */
if (a[ODP_FLOW_ATTR_KEY]) {
err = flow_from_nlattrs(&flowcmd->key, a[ODP_FLOW_ATTR_KEY]);
if (err)
goto error_free_skb;
} else
memset(&flowcmd->key, 0, sizeof(struct sw_flow_key));
/* ODP_FLOW_ATTR_ACTIONS. */
if (a[ODP_FLOW_ATTR_ACTIONS]) {
flowcmd->actions = nla_data(a[ODP_FLOW_ATTR_ACTIONS]);
flowcmd->actions_len = nla_len(a[ODP_FLOW_ATTR_ACTIONS]);
err = validate_actions(flowcmd->actions, flowcmd->actions_len);
if (err)
goto error_free_skb;
} else {
flowcmd->actions = NULL;
flowcmd->actions_len = 0;
}
flowcmd->clear = a[ODP_FLOW_ATTR_CLEAR] != NULL;
flowcmd->state = a[ODP_FLOW_ATTR_STATE] ? nla_get_u64(a[ODP_FLOW_ATTR_STATE]) : 0;
return skb; return skb;
error_free_skb:
kfree_skb(skb);
return ERR_PTR(err);
} }
static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow) static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
{ {
struct nlattr **a = info->attrs;
struct odp_header *odp_header = info->userhdr;
struct tbl_node *flow_node; struct tbl_node *flow_node;
struct dp_flowcmd flowcmd; struct sw_flow_key key;
struct sw_flow *flow; struct sw_flow *flow;
struct sk_buff *skb; struct sk_buff *reply;
struct datapath *dp; struct datapath *dp;
struct tbl *table; struct tbl *table;
u32 hash; u32 hash;
int error; int error;
skb = copy_flow_from_user(uodp_flow, &flowcmd); /* Extract key. */
error = PTR_ERR(skb); error = -EINVAL;
if (IS_ERR(skb)) if (!a[ODP_FLOW_ATTR_KEY])
goto exit; goto error;
error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
if (error)
goto error;
dp = get_dp(flowcmd.dp_idx); /* Validate actions. */
if (a[ODP_FLOW_ATTR_ACTIONS]) {
error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]);
if (error)
goto error;
} else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) {
error = -EINVAL;
goto error;
}
dp = get_dp(odp_header->dp_idx);
error = -ENODEV; error = -ENODEV;
if (!dp) if (!dp)
goto exit; goto error;
hash = flow_hash(&flowcmd.key); hash = flow_hash(&key);
table = get_table_protected(dp); table = get_table_protected(dp);
flow_node = tbl_lookup(table, &flowcmd.key, hash, flow_cmp); flow_node = tbl_lookup(table, &key, hash, flow_cmp);
if (!flow_node) { if (!flow_node) {
struct sw_flow_actions *acts; struct sw_flow_actions *acts;
/* Bail out if we're not allowed to create a new flow. */ /* Bail out if we're not allowed to create a new flow. */
error = -ENOENT; error = -ENOENT;
if (cmd == ODP_FLOW_SET) if (info->genlhdr->cmd == ODP_FLOW_CMD_SET)
goto exit; goto error;
/* Expand table, if necessary, to make room. */ /* Expand table, if necessary, to make room. */
if (tbl_count(table) >= tbl_n_buckets(table)) { if (tbl_count(table) >= tbl_n_buckets(table)) {
error = expand_table(dp); error = expand_table(dp);
if (error) if (error)
goto exit; goto error;
table = get_table_protected(dp); table = get_table_protected(dp);
} }
@@ -1006,26 +969,25 @@ static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow)
flow = flow_alloc(); flow = flow_alloc();
if (IS_ERR(flow)) { if (IS_ERR(flow)) {
error = PTR_ERR(flow); error = PTR_ERR(flow);
goto exit; goto error;
} }
flow->key = flowcmd.key; flow->key = key;
clear_stats(flow); clear_stats(flow);
/* Obtain actions. */ /* Obtain actions. */
acts = get_actions(&flowcmd); acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
error = PTR_ERR(acts); error = PTR_ERR(acts);
if (IS_ERR(acts)) if (IS_ERR(acts))
goto error_free_flow; goto error_free_flow;
rcu_assign_pointer(flow->sf_acts, acts); rcu_assign_pointer(flow->sf_acts, acts);
error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0);
if (error)
goto error_free_flow;
/* Put flow in bucket. */ /* Put flow in bucket. */
error = tbl_insert(table, &flow->tbl_node, hash); error = tbl_insert(table, &flow->tbl_node, hash);
if (error) if (error)
goto error_free_flow; goto error_free_flow;
reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
info->snd_seq, ODP_FLOW_CMD_NEW);
} else { } else {
/* We found a matching flow. */ /* We found a matching flow. */
struct sw_flow_actions *old_acts; struct sw_flow_actions *old_acts;
@@ -1037,124 +999,194 @@ static int new_flow(unsigned int cmd, struct odp_flow __user *uodp_flow)
* gets fixed. * gets fixed.
*/ */
error = -EEXIST; error = -EEXIST;
if (flowcmd.nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW &&
goto error_kfree_skb; info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
goto error;
/* Update actions. */ /* Update actions. */
flow = flow_cast(flow_node); flow = flow_cast(flow_node);
old_acts = rcu_dereference_protected(flow->sf_acts, old_acts = rcu_dereference_protected(flow->sf_acts,
lockdep_genl_is_held()); lockdep_genl_is_held());
if (flowcmd.actions && if (a[ODP_FLOW_ATTR_ACTIONS] &&
(old_acts->actions_len != flowcmd.actions_len || (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) ||
memcmp(old_acts->actions, flowcmd.actions, memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]),
flowcmd.actions_len))) { old_acts->actions_len))) {
struct sw_flow_actions *new_acts; struct sw_flow_actions *new_acts;
new_acts = get_actions(&flowcmd); new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
error = PTR_ERR(new_acts); error = PTR_ERR(new_acts);
if (IS_ERR(new_acts)) if (IS_ERR(new_acts))
goto error_kfree_skb; goto error;
rcu_assign_pointer(flow->sf_acts, new_acts); rcu_assign_pointer(flow->sf_acts, new_acts);
flow_deferred_free_acts(old_acts); flow_deferred_free_acts(old_acts);
} }
error = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0); reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
if (error) info->snd_seq, ODP_FLOW_CMD_NEW);
goto error_kfree_skb;
/* Clear stats. */ /* Clear stats. */
if (flowcmd.clear) { if (a[ODP_FLOW_ATTR_CLEAR]) {
spin_lock_bh(&flow->lock); spin_lock_bh(&flow->lock);
clear_stats(flow); clear_stats(flow);
spin_unlock_bh(&flow->lock); spin_unlock_bh(&flow->lock);
} }
} }
kfree_skb(skb);
if (!IS_ERR(reply))
genl_notify(reply, genl_info_net(info), info->snd_pid,
dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
else
netlink_set_err(INIT_NET_GENL_SOCK, 0,
dp_flow_multicast_group.id, PTR_ERR(reply));
return 0; return 0;
error_free_flow: error_free_flow:
flow_put(flow); flow_put(flow);
error_kfree_skb: error:
kfree_skb(skb);
exit:
return error; return error;
} }
static int get_or_del_flow(unsigned int cmd, struct odp_flow __user *uodp_flow) static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
{ {
struct nlattr **a = info->attrs;
struct odp_header *odp_header = info->userhdr;
struct sw_flow_key key;
struct tbl_node *flow_node; struct tbl_node *flow_node;
struct dp_flowcmd flowcmd; struct sk_buff *reply;
struct sw_flow *flow; struct sw_flow *flow;
struct sk_buff *skb;
struct datapath *dp; struct datapath *dp;
struct tbl *table; struct tbl *table;
int err; int err;
skb = copy_flow_from_user(uodp_flow, &flowcmd); if (!a[ODP_FLOW_ATTR_KEY])
if (IS_ERR(skb)) return -EINVAL;
return PTR_ERR(skb); err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
if (err)
return err;
dp = get_dp(flowcmd.dp_idx); dp = get_dp(odp_header->dp_idx);
if (!dp) if (!dp)
return -ENODEV; return -ENODEV;
table = get_table_protected(dp); table = get_table_protected(dp);
flow_node = tbl_lookup(table, &flowcmd.key, flow_hash(&flowcmd.key), flow_cmp); flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
if (!flow_node) if (!flow_node)
return -ENOENT; return -ENOENT;
if (cmd == ODP_FLOW_DEL) {
err = tbl_remove(table, flow_node);
if (err)
return err;
}
flow = flow_cast(flow_node); flow = flow_cast(flow_node);
err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len, 0); reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW);
if (!err && cmd == ODP_FLOW_DEL) if (IS_ERR(reply))
flow_deferred_free(flow); return PTR_ERR(reply);
return err; return genlmsg_reply(reply, info);
} }
static int dump_flow(struct odp_flow __user *uodp_flow) static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
{ {
struct nlattr **a = info->attrs;
struct odp_header *odp_header = info->userhdr;
struct sw_flow_key key;
struct tbl_node *flow_node; struct tbl_node *flow_node;
struct dp_flowcmd flowcmd; struct sk_buff *reply;
struct sw_flow *flow; struct sw_flow *flow;
struct sk_buff *skb;
struct datapath *dp; struct datapath *dp;
u32 bucket, obj; struct tbl *table;
int err; int err;
skb = copy_flow_from_user(uodp_flow, &flowcmd); if (!a[ODP_FLOW_ATTR_KEY])
err = PTR_ERR(skb); return flush_flows(odp_header->dp_idx);
if (IS_ERR(skb)) err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
goto exit; if (err)
return err;
dp = get_dp(flowcmd.dp_idx); dp = get_dp(odp_header->dp_idx);
err = -ENODEV;
if (!dp) if (!dp)
goto exit_kfree_skb; return -ENODEV;
bucket = flowcmd.state >> 32; table = get_table_protected(dp);
obj = flowcmd.state; flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
err = -ENODEV;
if (!flow_node) if (!flow_node)
goto exit_kfree_skb; return -ENOENT;
flow = flow_cast(flow_node); flow = flow_cast(flow_node);
err = copy_flow_to_user(uodp_flow, dp, flow, flowcmd.total_len,
((u64)bucket << 32) | obj);
exit_kfree_skb: reply = odp_flow_cmd_alloc_info(flow);
kfree_skb(skb); if (!reply)
exit: return -ENOMEM;
return err;
err = tbl_remove(table, flow_node);
if (err) {
kfree_skb(reply);
return err;
}
err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
info->snd_seq, 0, ODP_FLOW_CMD_DEL);
BUG_ON(err < 0);
flow_deferred_free(flow);
genl_notify(reply, genl_info_net(info), info->snd_pid,
dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
return 0;
} }
static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
struct datapath *dp;
dp = get_dp(odp_header->dp_idx);
if (!dp)
return -ENODEV;
for (;;) {
struct tbl_node *flow_node;
struct sw_flow *flow;
u32 bucket, obj;
bucket = cb->args[0];
obj = cb->args[1];
flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
if (!flow_node)
break;
flow = flow_cast(flow_node);
if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
ODP_FLOW_CMD_NEW) < 0)
break;
cb->args[0] = bucket;
cb->args[1] = obj;
}
return skb->len;
}
static struct genl_ops dp_flow_genl_ops[] = {
{ .cmd = ODP_FLOW_CMD_NEW,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
.doit = odp_flow_cmd_new_or_set
},
{ .cmd = ODP_FLOW_CMD_DEL,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
.doit = odp_flow_cmd_del
},
{ .cmd = ODP_FLOW_CMD_GET,
.flags = 0, /* OK for unprivileged users. */
.policy = flow_policy,
.doit = odp_flow_cmd_get,
.dumpit = odp_flow_cmd_dump
},
{ .cmd = ODP_FLOW_CMD_SET,
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
.policy = flow_policy,
.doit = odp_flow_cmd_new_or_set,
},
};
static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = { static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
#ifdef HAVE_NLA_NUL_STRING #ifdef HAVE_NLA_NUL_STRING
[ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
@@ -1925,72 +1957,6 @@ static struct genl_ops dp_vport_genl_ops[] = {
}, },
}; };
static long openvswitch_ioctl(struct file *f, unsigned int cmd,
unsigned long argp)
{
int err;
genl_lock();
switch (cmd) {
case ODP_FLOW_FLUSH:
err = flush_flows(argp);
goto exit;
case ODP_FLOW_NEW:
case ODP_FLOW_SET:
err = new_flow(cmd, (struct odp_flow __user *)argp);
goto exit;
case ODP_FLOW_GET:
case ODP_FLOW_DEL:
err = get_or_del_flow(cmd, (struct odp_flow __user *)argp);
goto exit;
case ODP_FLOW_DUMP:
err = dump_flow((struct odp_flow __user *)argp);
goto exit;
default:
err = -ENOIOCTLCMD;
break;
}
exit:
genl_unlock();
return err;
}
#ifdef CONFIG_COMPAT
static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp)
{
switch (cmd) {
case ODP_FLOW_FLUSH:
/* Ioctls that don't need any translation at all. */
return openvswitch_ioctl(f, cmd, argp);
case ODP_FLOW_NEW:
case ODP_FLOW_DEL:
case ODP_FLOW_GET:
case ODP_FLOW_SET:
case ODP_FLOW_DUMP:
/* Ioctls that just need their pointer argument extended. */
return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp));
default:
return -ENOIOCTLCMD;
}
}
#endif
static struct file_operations openvswitch_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = openvswitch_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = openvswitch_compat_ioctl,
#endif
};
static int major;
struct genl_family_and_ops { struct genl_family_and_ops {
struct genl_family *family; struct genl_family *family;
struct genl_ops *ops; struct genl_ops *ops;
@@ -2005,6 +1971,9 @@ static const struct genl_family_and_ops dp_genl_families[] = {
{ &dp_vport_genl_family, { &dp_vport_genl_family,
dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
&dp_vport_multicast_group }, &dp_vport_multicast_group },
{ &dp_flow_genl_family,
dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
&dp_flow_multicast_group },
{ &dp_packet_genl_family, { &dp_packet_genl_family,
dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
NULL }, NULL },
@@ -2073,18 +2042,12 @@ static int __init dp_init(void)
if (err) if (err)
goto error_vport_exit; goto error_vport_exit;
major = register_chrdev(0, "openvswitch", &openvswitch_fops); err = dp_register_genl();
if (err < 0) if (err < 0)
goto error_unreg_notifier; goto error_unreg_notifier;
err = dp_register_genl();
if (err < 0)
goto error_unreg_chrdev;
return 0; return 0;
error_unreg_chrdev:
unregister_chrdev(major, "openvswitch");
error_unreg_notifier: error_unreg_notifier:
unregister_netdevice_notifier(&dp_device_notifier); unregister_netdevice_notifier(&dp_device_notifier);
error_vport_exit: error_vport_exit:
@@ -2099,7 +2062,6 @@ static void dp_cleanup(void)
{ {
rcu_barrier(); rcu_barrier();
dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
unregister_chrdev(major, "openvswitch");
unregister_netdevice_notifier(&dp_device_notifier); unregister_netdevice_notifier(&dp_device_notifier);
vport_exit(); vport_exit();
flow_exit(); flow_exit();

View File

@@ -103,13 +103,11 @@ void flow_used(struct sw_flow *flow, struct sk_buff *skb)
spin_unlock_bh(&flow->lock); spin_unlock_bh(&flow->lock);
} }
struct sw_flow_actions *flow_actions_alloc(u32 actions_len) struct sw_flow_actions *flow_actions_alloc(const struct nlattr *actions)
{ {
int actions_len = nla_len(actions);
struct sw_flow_actions *sfa; struct sw_flow_actions *sfa;
if (actions_len % NLA_ALIGNTO)
return ERR_PTR(-EINVAL);
/* At least DP_MAX_PORTS actions are required to be able to flood a /* At least DP_MAX_PORTS actions are required to be able to flood a
* packet to every port. Factor of 2 allows for setting VLAN tags, * packet to every port. Factor of 2 allows for setting VLAN tags,
* etc. */ * etc. */
@@ -121,6 +119,7 @@ struct sw_flow_actions *flow_actions_alloc(u32 actions_len)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
sfa->actions_len = actions_len; sfa->actions_len = actions_len;
memcpy(sfa->actions, nla_data(actions), actions_len);
return sfa; return sfa;
} }

View File

@@ -83,7 +83,7 @@ struct sw_flow *flow_alloc(void);
void flow_deferred_free(struct sw_flow *); void flow_deferred_free(struct sw_flow *);
void flow_free_tbl(struct tbl_node *); void flow_free_tbl(struct tbl_node *);
struct sw_flow_actions *flow_actions_alloc(u32 actions_len); struct sw_flow_actions *flow_actions_alloc(const struct nlattr *);
void flow_deferred_free_acts(struct sw_flow_actions *); void flow_deferred_free_acts(struct sw_flow_actions *);
void flow_hold(struct sw_flow *); void flow_hold(struct sw_flow *);

View File

@@ -69,13 +69,6 @@
#include <linux/if_link.h> #include <linux/if_link.h>
#include <linux/netlink.h> #include <linux/netlink.h>
#define ODP_FLOW_NEW _IOWR('O', 13, struct odp_flow)
#define ODP_FLOW_DEL _IOWR('O', 14, struct odp_flow)
#define ODP_FLOW_GET _IOWR('O', 15, struct odp_flow)
#define ODP_FLOW_SET _IOWR('O', 16, struct odp_flow)
#define ODP_FLOW_DUMP _IOWR('O', 17, struct odp_flow)
#define ODP_FLOW_FLUSH _IO('O', 19)
/* Datapaths. */ /* Datapaths. */
@@ -288,6 +281,19 @@ enum {
}; };
#define ODP_PATCH_ATTR_MAX (__ODP_PATCH_ATTR_MAX - 1) #define ODP_PATCH_ATTR_MAX (__ODP_PATCH_ATTR_MAX - 1)
/* Flows. */
#define ODP_FLOW_FAMILY "odp_flow"
#define ODP_FLOW_MCGROUP "odp_flow"
enum odp_flow_cmd {
ODP_FLOW_CMD_UNSPEC,
ODP_FLOW_CMD_NEW,
ODP_FLOW_CMD_DEL,
ODP_FLOW_CMD_GET,
ODP_FLOW_CMD_SET
};
struct odp_flow_stats { struct odp_flow_stats {
uint64_t n_packets; /* Number of matched packets. */ uint64_t n_packets; /* Number of matched packets. */
@@ -350,23 +356,32 @@ struct odp_key_arp {
}; };
/** /**
* struct odp_flow - header with basic information about a flow. * enum odp_flow_attr - attributes for %ODP_FLOW_* commands.
* @dp_idx: Datapath index. * @ODP_FLOW_ATTR_KEY: Nested %ODP_KEY_ATTR_* attributes specifying the flow
* @len: Length of this structure plus the Netlink attributes following it. * key. Always present in notifications. Required for all requests (except
* @total_len: Total space available for kernel reply to request. * dumps).
* @ODP_FLOW_ATTR_ACTIONS: Nested %ODPAT_* attributes specifying the actions to
* take for packets that match the key. Always present in notifications.
* Required for %ODP_FLOW_CMD_NEW requests, optional on %ODP_FLOW_CMD_SET
* request to change the existing actions, ignored for other requests.
* @ODP_FLOW_ATTR_STATS: &struct odp_flow_stats giving statistics for this
* flow. Present in notifications if the stats would be nonzero. Ignored in
* requests.
* @ODP_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
* TCP flags seen on packets in this flow. Only present in notifications for
* TCP flows, and only if it would be nonzero. Ignored in requests.
* @ODP_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
* the system monotonic clock, at which a packet was last processed for this
* flow. Only present in notifications if a packet has been processed for this
* flow. Ignored in requests.
* @ODP_FLOW_ATTR_CLEAR: If present in a %ODP_FLOW_CMD_SET request, clears the
* last-used time, accumulated TCP flags, and statistics for this flow.
* Otherwise ignored in requests. Never present in notifications.
* *
* Followed by &struct nlattr attributes, whose types are drawn from * These attributes follow the &struct odp_header within the Generic Netlink
* %ODP_FLOW_ATTR_*, up to a length of @len bytes including the &struct * payload for %ODP_FLOW_* commands.
* odp_flow header.
*/ */
struct odp_flow { enum odp_flow_attr {
uint32_t nlmsg_flags;
uint32_t dp_idx;
uint32_t len;
uint32_t total_len;
};
enum odp_flow_type {
ODP_FLOW_ATTR_UNSPEC, ODP_FLOW_ATTR_UNSPEC,
ODP_FLOW_ATTR_KEY, /* Sequence of ODP_KEY_ATTR_* attributes. */ ODP_FLOW_ATTR_KEY, /* Sequence of ODP_KEY_ATTR_* attributes. */
ODP_FLOW_ATTR_ACTIONS, /* Sequence of nested ODPAT_* attributes. */ ODP_FLOW_ATTR_ACTIONS, /* Sequence of nested ODPAT_* attributes. */
@@ -374,7 +389,6 @@ enum odp_flow_type {
ODP_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ ODP_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
ODP_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ ODP_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
ODP_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ ODP_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
ODP_FLOW_ATTR_STATE, /* u64 state for ODP_FLOW_DUMP. */
__ODP_FLOW_ATTR_MAX __ODP_FLOW_ATTR_MAX
}; };

View File

@@ -25,12 +25,10 @@
#include <inttypes.h> #include <inttypes.h>
#include <net/if.h> #include <net/if.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/ethtool.h>
#include <linux/pkt_sched.h> #include <linux/pkt_sched.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include <linux/sockios.h> #include <linux/sockios.h>
#include <stdlib.h> #include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <unistd.h> #include <unistd.h>
@@ -80,18 +78,18 @@ static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply,
struct ofpbuf **bufp); struct ofpbuf **bufp);
struct dpif_linux_flow { struct dpif_linux_flow {
/* ioctl command argument. */ /* Generic Netlink header. */
int cmd; uint8_t cmd;
/* struct odp_flow header. */ /* struct odp_header. */
unsigned int nlmsg_flags; unsigned int nlmsg_flags;
uint32_t dp_idx; uint32_t dp_idx;
/* Attributes. /* Attributes.
* *
* The 'stats', 'used', and 'state' members point to 64-bit data that might * The 'stats' and 'used' members point to 64-bit data that might only be
* only be aligned on 32-bit boundaries, so get_unaligned_u64() should be * aligned on 32-bit boundaries, so get_unaligned_u64() should be used to
* used to access their values. */ * access their values. */
const struct nlattr *key; /* ODP_FLOW_ATTR_KEY. */ const struct nlattr *key; /* ODP_FLOW_ATTR_KEY. */
size_t key_len; size_t key_len;
const struct nlattr *actions; /* ODP_FLOW_ATTR_ACTIONS. */ const struct nlattr *actions; /* ODP_FLOW_ATTR_ACTIONS. */
@@ -100,10 +98,13 @@ struct dpif_linux_flow {
const uint8_t *tcp_flags; /* ODP_FLOW_ATTR_TCP_FLAGS. */ const uint8_t *tcp_flags; /* ODP_FLOW_ATTR_TCP_FLAGS. */
const uint64_t *used; /* ODP_FLOW_ATTR_USED. */ const uint64_t *used; /* ODP_FLOW_ATTR_USED. */
bool clear; /* ODP_FLOW_ATTR_CLEAR. */ bool clear; /* ODP_FLOW_ATTR_CLEAR. */
const uint64_t *state; /* ODP_FLOW_ATTR_STATE. */
}; };
static void dpif_linux_flow_init(struct dpif_linux_flow *); static void dpif_linux_flow_init(struct dpif_linux_flow *);
static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *,
const struct ofpbuf *);
static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *,
struct ofpbuf *);
static int dpif_linux_flow_transact(const struct dpif_linux_flow *request, static int dpif_linux_flow_transact(const struct dpif_linux_flow *request,
struct dpif_linux_flow *reply, struct dpif_linux_flow *reply,
struct ofpbuf **bufp); struct ofpbuf **bufp);
@@ -113,7 +114,6 @@ static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *,
/* Datapath interface for the openvswitch Linux kernel module. */ /* Datapath interface for the openvswitch Linux kernel module. */
struct dpif_linux { struct dpif_linux {
struct dpif dpif; struct dpif dpif;
int fd;
/* Multicast group messages. */ /* Multicast group messages. */
struct nl_sock *mc_sock; struct nl_sock *mc_sock;
@@ -136,6 +136,7 @@ static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5);
/* Generic Netlink family numbers for ODP. */ /* Generic Netlink family numbers for ODP. */
static int odp_datapath_family; static int odp_datapath_family;
static int odp_vport_family; static int odp_vport_family;
static int odp_flow_family;
static int odp_packet_family; static int odp_packet_family;
/* Generic Netlink socket. */ /* Generic Netlink socket. */
@@ -145,9 +146,6 @@ static int dpif_linux_init(void);
static int open_dpif(const struct dpif_linux_dp *, static int open_dpif(const struct dpif_linux_dp *,
const struct dpif_linux_vport *local_vport, const struct dpif_linux_vport *local_vport,
struct dpif **); struct dpif **);
static int get_openvswitch_major(void);
static int open_minor(int minor, int *fdp);
static int make_openvswitch_device(int minor, char **fnp);
static void dpif_linux_port_changed(const struct rtnetlink_link_change *, static void dpif_linux_port_changed(const struct rtnetlink_link_change *,
void *dpif); void *dpif);
@@ -168,7 +166,6 @@ dpif_linux_enumerate(struct svec *all_dps)
{ {
struct nl_dump dump; struct nl_dump dump;
struct ofpbuf msg; struct ofpbuf msg;
int major;
int error; int error;
error = dpif_linux_init(); error = dpif_linux_init();
@@ -176,12 +173,6 @@ dpif_linux_enumerate(struct svec *all_dps)
return error; return error;
} }
/* Check that the Open vSwitch module is loaded. */
major = get_openvswitch_major();
if (major < 0) {
return -major;
}
dpif_linux_dp_dump_start(&dump); dpif_linux_dp_dump_start(&dump);
while (nl_dump_next(&dump, &msg)) { while (nl_dump_next(&dump, &msg)) {
struct dpif_linux_dp dp; struct dpif_linux_dp dp;
@@ -252,14 +243,8 @@ open_dpif(const struct dpif_linux_dp *dp,
struct dpif_linux *dpif; struct dpif_linux *dpif;
char *name; char *name;
int error; int error;
int fd;
int i; int i;
error = open_minor(dp_idx, &fd);
if (error) {
goto error;
}
dpif = xmalloc(sizeof *dpif); dpif = xmalloc(sizeof *dpif);
error = rtnetlink_link_notifier_register(&dpif->port_notifier, error = rtnetlink_link_notifier_register(&dpif->port_notifier,
dpif_linux_port_changed, dpif); dpif_linux_port_changed, dpif);
@@ -271,7 +256,6 @@ open_dpif(const struct dpif_linux_dp *dp,
dpif_init(&dpif->dpif, &dpif_linux_class, name, dp_idx, dp_idx); dpif_init(&dpif->dpif, &dpif_linux_class, name, dp_idx, dp_idx);
free(name); free(name);
dpif->fd = fd;
dpif->mc_sock = NULL; dpif->mc_sock = NULL;
for (i = 0; i < DPIF_N_UC_TYPES; i++) { for (i = 0; i < DPIF_N_UC_TYPES; i++) {
dpif->mcgroups[i] = dp->mcgroups[i]; dpif->mcgroups[i] = dp->mcgroups[i];
@@ -288,8 +272,6 @@ open_dpif(const struct dpif_linux_dp *dp,
error_free: error_free:
free(dpif); free(dpif);
close(fd);
error:
return error; return error;
} }
@@ -300,7 +282,6 @@ dpif_linux_close(struct dpif *dpif_)
rtnetlink_link_notifier_unregister(&dpif->port_notifier); rtnetlink_link_notifier_unregister(&dpif->port_notifier);
shash_destroy(&dpif->changed_ports); shash_destroy(&dpif->changed_ports);
free(dpif->local_ifname); free(dpif->local_ifname);
close(dpif->fd);
free(dpif); free(dpif);
} }
@@ -472,7 +453,12 @@ static int
dpif_linux_flow_flush(struct dpif *dpif_) dpif_linux_flow_flush(struct dpif *dpif_)
{ {
struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux *dpif = dpif_linux_cast(dpif_);
return ioctl(dpif->fd, ODP_FLOW_FLUSH, dpif->minor) ? errno : 0; struct dpif_linux_flow flow;
dpif_linux_flow_init(&flow);
flow.cmd = ODP_FLOW_CMD_DEL;
flow.dp_idx = dpif->minor;
return dpif_linux_flow_transact(&flow, NULL, NULL);
} }
struct dpif_linux_port_state { struct dpif_linux_port_state {
@@ -574,7 +560,7 @@ dpif_linux_flow_get(const struct dpif *dpif_,
int error; int error;
dpif_linux_flow_init(&request); dpif_linux_flow_init(&request);
request.cmd = ODP_FLOW_GET; request.cmd = ODP_FLOW_CMD_GET;
request.dp_idx = dpif->minor; request.dp_idx = dpif->minor;
request.key = key; request.key = key;
request.key_len = key_len; request.key_len = key_len;
@@ -606,7 +592,7 @@ dpif_linux_flow_put(struct dpif *dpif_, enum dpif_flow_put_flags flags,
int error; int error;
dpif_linux_flow_init(&request); dpif_linux_flow_init(&request);
request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_NEW : ODP_FLOW_SET; request.cmd = flags & DPIF_FP_CREATE ? ODP_FLOW_CMD_NEW : ODP_FLOW_CMD_SET;
request.dp_idx = dpif->minor; request.dp_idx = dpif->minor;
request.key = key; request.key = key;
request.key_len = key_len; request.key_len = key_len;
@@ -637,7 +623,7 @@ dpif_linux_flow_del(struct dpif *dpif_,
int error; int error;
dpif_linux_flow_init(&request); dpif_linux_flow_init(&request);
request.cmd = ODP_FLOW_DEL; request.cmd = ODP_FLOW_CMD_DEL;
request.dp_idx = dpif->minor; request.dp_idx = dpif->minor;
request.key = key; request.key = key;
request.key_len = key_len; request.key_len = key_len;
@@ -652,37 +638,48 @@ dpif_linux_flow_del(struct dpif *dpif_,
} }
struct dpif_linux_flow_state { struct dpif_linux_flow_state {
struct nl_dump dump;
struct dpif_linux_flow flow; struct dpif_linux_flow flow;
struct ofpbuf *buf;
struct dpif_flow_stats stats; struct dpif_flow_stats stats;
}; };
static int static int
dpif_linux_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep) dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep)
{ {
*statep = xzalloc(sizeof(struct dpif_linux_flow_state)); struct dpif_linux *dpif = dpif_linux_cast(dpif_);
struct dpif_linux_flow_state *state;
struct dpif_linux_flow request;
struct ofpbuf *buf;
*statep = state = xmalloc(sizeof *state);
dpif_linux_flow_init(&request);
request.cmd = ODP_DP_CMD_GET;
request.dp_idx = dpif->minor;
buf = ofpbuf_new(1024);
dpif_linux_flow_to_ofpbuf(&request, buf);
nl_dump_start(&state->dump, genl_sock, buf);
ofpbuf_delete(buf);
return 0; return 0;
} }
static int static int
dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_, dpif_linux_flow_dump_next(const struct dpif *dpif_ OVS_UNUSED, void *state_,
const struct nlattr **key, size_t *key_len, const struct nlattr **key, size_t *key_len,
const struct nlattr **actions, size_t *actions_len, const struct nlattr **actions, size_t *actions_len,
const struct dpif_flow_stats **stats) const struct dpif_flow_stats **stats)
{ {
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
struct dpif_linux_flow_state *state = state_; struct dpif_linux_flow_state *state = state_;
struct ofpbuf *old_buf = state->buf; struct ofpbuf buf;
struct dpif_linux_flow request;
int error; int error;
dpif_linux_flow_init(&request); if (!nl_dump_next(&state->dump, &buf)) {
request.cmd = ODP_FLOW_DUMP; return EOF;
request.dp_idx = dpif->minor; }
request.state = state->flow.state;
error = dpif_linux_flow_transact(&request, &state->flow, &state->buf);
ofpbuf_delete(old_buf);
error = dpif_linux_flow_from_ofpbuf(&state->flow, &buf);
if (!error) { if (!error) {
if (key) { if (key) {
*key = state->flow.key; *key = state->flow.key;
@@ -697,17 +694,16 @@ dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_,
*stats = &state->stats; *stats = &state->stats;
} }
} }
return error == ENODEV ? EOF : error; return error;
} }
static int static int
dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
{ {
struct dpif_linux_flow_state *state = state_; struct dpif_linux_flow_state *state = state_;
int error = nl_dump_done(&state->dump);
ofpbuf_delete(state->buf);
free(state); free(state);
return 0; return error;
} }
static int static int
@@ -994,8 +990,6 @@ const struct dpif_class dpif_linux_class = {
dpif_linux_recv_purge, dpif_linux_recv_purge,
}; };
static int get_major(const char *target);
static int static int
dpif_linux_init(void) dpif_linux_init(void)
{ {
@@ -1004,9 +998,17 @@ dpif_linux_init(void)
if (error < 0) { if (error < 0) {
error = nl_lookup_genl_family(ODP_DATAPATH_FAMILY, error = nl_lookup_genl_family(ODP_DATAPATH_FAMILY,
&odp_datapath_family); &odp_datapath_family);
if (error) {
VLOG_ERR("Generic Netlink family '%s' does not exist. "
"The Open vSwitch kernel module is probably not loaded.",
ODP_DATAPATH_FAMILY);
}
if (!error) { if (!error) {
error = nl_lookup_genl_family(ODP_VPORT_FAMILY, &odp_vport_family); error = nl_lookup_genl_family(ODP_VPORT_FAMILY, &odp_vport_family);
} }
if (!error) {
error = nl_lookup_genl_family(ODP_FLOW_FAMILY, &odp_flow_family);
}
if (!error) { if (!error) {
error = nl_lookup_genl_family(ODP_PACKET_FAMILY, error = nl_lookup_genl_family(ODP_PACKET_FAMILY,
&odp_packet_family); &odp_packet_family);
@@ -1037,146 +1039,6 @@ dpif_linux_is_internal_device(const char *name)
return reply.type == ODP_VPORT_TYPE_INTERNAL; return reply.type == ODP_VPORT_TYPE_INTERNAL;
} }
static int
make_openvswitch_device(int minor, char **fnp)
{
const char dirname[] = "/dev/net";
int major;
dev_t dev;
struct stat s;
char fn[128];
*fnp = NULL;
major = get_openvswitch_major();
if (major < 0) {
return -major;
}
dev = makedev(major, minor);
sprintf(fn, "%s/dp%d", dirname, minor);
if (!stat(fn, &s)) {
if (!S_ISCHR(s.st_mode)) {
VLOG_WARN_RL(&error_rl, "%s is not a character device, fixing",
fn);
} else if (s.st_rdev != dev) {
VLOG_WARN_RL(&error_rl,
"%s is device %u:%u but should be %u:%u, fixing",
fn, major(s.st_rdev), minor(s.st_rdev),
major(dev), minor(dev));
} else {
goto success;
}
if (unlink(fn)) {
VLOG_WARN_RL(&error_rl, "%s: unlink failed (%s)",
fn, strerror(errno));
return errno;
}
} else if (errno == ENOENT) {
if (stat(dirname, &s)) {
if (errno == ENOENT) {
if (mkdir(dirname, 0755)) {
VLOG_WARN_RL(&error_rl, "%s: mkdir failed (%s)",
dirname, strerror(errno));
return errno;
}
} else {
VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)",
dirname, strerror(errno));
return errno;
}
}
} else {
VLOG_WARN_RL(&error_rl, "%s: stat failed (%s)", fn, strerror(errno));
return errno;
}
/* The device needs to be created. */
if (mknod(fn, S_IFCHR | 0700, dev)) {
VLOG_WARN_RL(&error_rl,
"%s: creating character device %u:%u failed (%s)",
fn, major(dev), minor(dev), strerror(errno));
return errno;
}
success:
*fnp = xstrdup(fn);
return 0;
}
/* Return the major device number of the Open vSwitch device. If it
* cannot be determined, a negative errno is returned. */
static int
get_openvswitch_major(void)
{
static int openvswitch_major = -1;
if (openvswitch_major < 0) {
openvswitch_major = get_major("openvswitch");
}
return openvswitch_major;
}
static int
get_major(const char *target)
{
const char fn[] = "/proc/devices";
char line[128];
FILE *file;
int ln;
file = fopen(fn, "r");
if (!file) {
VLOG_ERR("opening %s failed (%s)", fn, strerror(errno));
return -errno;
}
for (ln = 1; fgets(line, sizeof line, file); ln++) {
char name[64];
int major;
if (!strncmp(line, "Character", 9) || line[0] == '\0') {
/* Nothing to do. */
} else if (!strncmp(line, "Block", 5)) {
/* We only want character devices, so skip the rest of the file. */
break;
} else if (sscanf(line, "%d %63s", &major, name)) {
if (!strcmp(name, target)) {
fclose(file);
return major;
}
} else {
VLOG_WARN_ONCE("%s:%d: syntax error", fn, ln);
}
}
fclose(file);
VLOG_ERR("%s: %s major not found (is the module loaded?)", fn, target);
return -ENODEV;
}
static int
open_minor(int minor, int *fdp)
{
int error;
char *fn;
error = make_openvswitch_device(minor, &fn);
if (error) {
return error;
}
*fdp = open(fn, O_RDONLY | O_NONBLOCK);
if (*fdp < 0) {
error = errno;
VLOG_WARN("%s: open failed (%s)", fn, strerror(error));
free(fn);
return error;
}
free(fn);
return 0;
}
static void static void
dpif_linux_port_changed(const struct rtnetlink_link_change *change, dpif_linux_port_changed(const struct rtnetlink_link_change *change,
void *dpif_) void *dpif_)
@@ -1196,24 +1058,6 @@ dpif_linux_port_changed(const struct rtnetlink_link_change *change,
dpif->change_error = true; dpif->change_error = true;
} }
} }
static int
get_dp0_fd(int *dp0_fdp)
{
static int dp0_fd = -1;
if (dp0_fd < 0) {
int error;
int fd;
error = open_minor(0, &fd);
if (error) {
return error;
}
dp0_fd = fd;
}
*dp0_fdp = dp0_fd;
return 0;
}
/* Parses the contents of 'buf', which contains a "struct odp_header" followed /* Parses the contents of 'buf', which contains a "struct odp_header" followed
* by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a
@@ -1588,8 +1432,8 @@ dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply,
return dpif_linux_dp_transact(&request, reply, bufp); return dpif_linux_dp_transact(&request, reply, bufp);
} }
/* Parses the contents of 'buf', which contains a "struct odp_flow" followed by /* Parses the contents of 'buf', which contains a "struct odp_header" followed
* Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a
* positive errno value. * positive errno value.
* *
* 'flow' will contain pointers into 'buf', so the caller should not free 'buf' * 'flow' will contain pointers into 'buf', so the caller should not free 'buf'
@@ -1608,22 +1452,29 @@ dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow,
[ODP_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true }, [ODP_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true },
[ODP_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true }, [ODP_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true },
/* The kernel never uses ODP_FLOW_ATTR_CLEAR. */ /* The kernel never uses ODP_FLOW_ATTR_CLEAR. */
[ODP_FLOW_ATTR_STATE] = { .type = NL_A_U64, .optional = true },
}; };
struct odp_flow *odp_flow;
struct nlattr *a[ARRAY_SIZE(odp_flow_policy)]; struct nlattr *a[ARRAY_SIZE(odp_flow_policy)];
struct odp_header *odp_header;
struct nlmsghdr *nlmsg;
struct genlmsghdr *genl;
struct ofpbuf b;
dpif_linux_flow_init(flow); dpif_linux_flow_init(flow);
if (!nl_policy_parse(buf, sizeof *odp_flow, odp_flow_policy, ofpbuf_use_const(&b, buf->data, buf->size);
a, ARRAY_SIZE(odp_flow_policy))) { nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
genl = ofpbuf_try_pull(&b, sizeof *genl);
odp_header = ofpbuf_try_pull(&b, sizeof *odp_header);
if (!nlmsg || !genl || !odp_header
|| nlmsg->nlmsg_type != odp_flow_family
|| !nl_policy_parse(&b, 0, odp_flow_policy, a,
ARRAY_SIZE(odp_flow_policy))) {
return EINVAL; return EINVAL;
} }
odp_flow = buf->data;
flow->nlmsg_flags = odp_flow->nlmsg_flags; flow->nlmsg_flags = nlmsg->nlmsg_flags;
flow->dp_idx = odp_flow->dp_idx; flow->dp_idx = odp_header->dp_idx;
flow->key = nl_attr_get(a[ODP_FLOW_ATTR_KEY]); flow->key = nl_attr_get(a[ODP_FLOW_ATTR_KEY]);
flow->key_len = nl_attr_get_size(a[ODP_FLOW_ATTR_KEY]); flow->key_len = nl_attr_get_size(a[ODP_FLOW_ATTR_KEY]);
if (a[ODP_FLOW_ATTR_ACTIONS]) { if (a[ODP_FLOW_ATTR_ACTIONS]) {
@@ -1636,21 +1487,22 @@ dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow,
if (a[ODP_FLOW_ATTR_TCP_FLAGS]) { if (a[ODP_FLOW_ATTR_TCP_FLAGS]) {
flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]); flow->tcp_flags = nl_attr_get(a[ODP_FLOW_ATTR_TCP_FLAGS]);
} }
if (a[ODP_FLOW_ATTR_STATE]) {
flow->state = nl_attr_get(a[ODP_FLOW_ATTR_STATE]);
}
return 0; return 0;
} }
/* Appends to 'buf' (which must initially be empty) a "struct odp_flow" /* Appends to 'buf' (which must initially be empty) a "struct odp_header"
* followed by Netlink attributes corresponding to 'flow'. */ * followed by Netlink attributes corresponding to 'flow'. */
static void static void
dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow,
struct ofpbuf *buf) struct ofpbuf *buf)
{ {
struct odp_flow *odp_flow; struct odp_header *odp_header;
ofpbuf_reserve(buf, sizeof odp_flow); nl_msg_put_genlmsghdr(buf, 0, odp_flow_family,
NLM_F_REQUEST | flow->nlmsg_flags, flow->cmd, 1);
odp_header = ofpbuf_put_uninit(buf, sizeof *odp_header);
odp_header->dp_idx = flow->dp_idx;
if (flow->key_len) { if (flow->key_len) {
nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len); nl_msg_put_unspec(buf, ODP_FLOW_ATTR_KEY, flow->key, flow->key_len);
@@ -1669,17 +1521,6 @@ dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow,
if (flow->clear) { if (flow->clear) {
nl_msg_put_flag(buf, ODP_FLOW_ATTR_CLEAR); nl_msg_put_flag(buf, ODP_FLOW_ATTR_CLEAR);
} }
if (flow->state) {
nl_msg_put_u64(buf, ODP_FLOW_ATTR_STATE,
get_unaligned_u64(flow->state));
}
odp_flow = ofpbuf_push_uninit(buf, sizeof *odp_flow);
odp_flow->nlmsg_flags = flow->nlmsg_flags;
odp_flow->dp_idx = flow->dp_idx;
odp_flow->len = buf->size;
odp_flow->total_len = (char *) ofpbuf_end(buf) - (char *) buf->data;
} }
/* Clears 'flow' to "empty" values. */ /* Clears 'flow' to "empty" values. */
@@ -1692,49 +1533,32 @@ dpif_linux_flow_init(struct dpif_linux_flow *flow)
/* Executes 'request' in the kernel datapath. If the command fails, returns a /* Executes 'request' in the kernel datapath. If the command fails, returns a
* positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0
* without doing anything else. If 'reply' and 'bufp' are nonnull, then the * without doing anything else. If 'reply' and 'bufp' are nonnull, then the
* result of the command is expected to be an odp_flow also, which is decoded * result of the command is expected to be a flow also, which is decoded and
* and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply
* reply is no longer needed ('reply' will contain pointers into '*bufp'). */ * is no longer needed ('reply' will contain pointers into '*bufp'). */
int int
dpif_linux_flow_transact(const struct dpif_linux_flow *request, dpif_linux_flow_transact(const struct dpif_linux_flow *request,
struct dpif_linux_flow *reply, struct ofpbuf **bufp) struct dpif_linux_flow *reply, struct ofpbuf **bufp)
{ {
struct ofpbuf *buf = NULL; struct ofpbuf *request_buf;
int error; int error;
int fd;
assert((reply != NULL) == (bufp != NULL)); assert((reply != NULL) == (bufp != NULL));
error = get_dp0_fd(&fd); request_buf = ofpbuf_new(1024);
if (error) { dpif_linux_flow_to_ofpbuf(request, request_buf);
goto error; error = nl_sock_transact(genl_sock, request_buf, bufp);
} ofpbuf_delete(request_buf);
buf = ofpbuf_new(1024); if (reply) {
dpif_linux_flow_to_ofpbuf(request, buf); if (!error) {
error = dpif_linux_flow_from_ofpbuf(reply, *bufp);
error = ioctl(fd, request->cmd, buf->data) ? errno : 0; }
if (error) { if (error) {
goto error; dpif_linux_flow_init(reply);
} ofpbuf_delete(*bufp);
*bufp = NULL;
if (bufp) {
buf->size = ((struct odp_flow *) buf->data)->len;
error = dpif_linux_flow_from_ofpbuf(reply, buf);
if (error) {
goto error;
} }
*bufp = buf;
} else {
ofpbuf_delete(buf);
}
return 0;
error:
ofpbuf_delete(buf);
if (bufp) {
memset(reply, 0, sizeof *reply);
*bufp = NULL;
} }
return error; return error;
} }