2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-30 22:05:19 +00:00

datapath: Get packet metadata from userspace in odp_packet_cmd_execute().

Until now, the tun_id and in_port have been lost when a packet is sent from
the kernel to userspace and then back to the kernel.  I didn't think that
this was a problem, but recent behavior made me look closer and see that
it makes a difference if sFlow is turned on or if an
ODP_ATTR_ACTION_CONTROLLER action is present.  We could possibly kluge
around those, but for future-proofing it seems better to pass the packet
metadata from userspace to the kernel.  That is what this commit does.

This commit introduces a user-kernel protocol break.  We could avoid that,
if it is desirable, by making ODP_PACKET_ATTR_KEY optional for
ODP_PACKET_CMD_EXECUTE commands.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
This commit is contained in:
Ben Pfaff
2011-06-01 13:39:51 -07:00
parent b85145b59f
commit 80e5eed9c2
9 changed files with 135 additions and 29 deletions

View File

@@ -655,7 +655,8 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
int key_len;
err = -EINVAL;
if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_KEY] ||
!a[ODP_PACKET_ATTR_ACTIONS] ||
nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
goto err;
@@ -694,6 +695,12 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_flow_put;
flow->tbl_node.hash = flow_hash(&flow->key, key_len);
err = flow_metadata_from_nlattrs(&flow->key.eth.in_port,
&flow->key.eth.tun_id,
a[ODP_PACKET_ATTR_KEY]);
if (err)
goto err_flow_put;
acts = flow_actions_alloc(a[ODP_PACKET_ATTR_ACTIONS]);
err = PTR_ERR(acts);
if (IS_ERR(acts))
@@ -725,6 +732,7 @@ err:
static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
[ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
[ODP_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
[ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
};

View File

@@ -597,6 +597,23 @@ int flow_cmp(const struct tbl_node *node, void *key2_, int len)
return !memcmp(key1, key2, len);
}
/* The size of the argument for each %ODP_KEY_ATTR_* Netlink attribute. */
static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = {
[ODP_KEY_ATTR_TUN_ID] = 8,
[ODP_KEY_ATTR_IN_PORT] = 4,
[ODP_KEY_ATTR_ETHERNET] = sizeof(struct odp_key_ethernet),
[ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q),
[ODP_KEY_ATTR_ETHERTYPE] = 2,
[ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4),
[ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6),
[ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp),
[ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp),
[ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp),
[ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6),
[ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp),
[ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd),
};
/**
* flow_from_nlattrs - parses Netlink attributes into a flow key.
* @swkey: receives the extracted flow key.
@@ -625,22 +642,6 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
prev_type = ODP_KEY_ATTR_UNSPEC;
nla_for_each_nested(nla, attr, rem) {
static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = {
[ODP_KEY_ATTR_TUN_ID] = 8,
[ODP_KEY_ATTR_IN_PORT] = 4,
[ODP_KEY_ATTR_ETHERNET] = sizeof(struct odp_key_ethernet),
[ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q),
[ODP_KEY_ATTR_ETHERTYPE] = 2,
[ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4),
[ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6),
[ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp),
[ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp),
[ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp),
[ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6),
[ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp),
[ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd),
};
const struct odp_key_ethernet *eth_key;
const struct odp_key_8021q *q_key;
const struct odp_key_ipv4 *ipv4_key;
@@ -868,6 +869,62 @@ ok:
return error;
}
/**
* flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
* @in_port: receives the extracted input port.
* @tun_id: receives the extracted tunnel ID.
* @key: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute
* sequence.
*
* This parses a series of Netlink attributes that form a flow key, which must
* take the same form accepted by flow_from_nlattrs(), but only enough of it to
* get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself.
*/
int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id,
const struct nlattr *attr)
{
const struct nlattr *nla;
u16 prev_type;
int rem;
*tun_id = 0;
prev_type = ODP_KEY_ATTR_UNSPEC;
nla_for_each_nested(nla, attr, rem) {
int type = nla_type(nla);
if (type > ODP_KEY_ATTR_MAX || nla_len(nla) != key_lens[type])
return -EINVAL;
switch (TRANSITION(prev_type, type)) {
case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_TUN_ID):
*tun_id = nla_get_be64(nla);
break;
case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_IN_PORT):
case TRANSITION(ODP_KEY_ATTR_TUN_ID, ODP_KEY_ATTR_IN_PORT):
if (nla_get_u32(nla) >= DP_MAX_PORTS)
return -EINVAL;
*in_port = nla_get_u32(nla);
break;
default:
goto done;
}
prev_type = type;
}
if (rem)
return -EINVAL;
done:
if (prev_type == ODP_KEY_ATTR_UNSPEC ||
prev_type == ODP_KEY_ATTR_TUN_ID)
return -EINVAL;
return 0;
}
int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
{
struct odp_key_ethernet *eth_key;

View File

@@ -152,6 +152,8 @@ int flow_cmp(const struct tbl_node *, void *target, int len);
int flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
const struct nlattr *);
int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id,
const struct nlattr *);
static inline struct sw_flow *flow_cast(const struct tbl_node *node)
{

View File

@@ -34,6 +34,7 @@
#include "bitmap.h"
#include "dpif-provider.h"
#include "dynamic-string.h"
#include "netdev.h"
#include "netdev-linux.h"
#include "netdev-vport.h"
@@ -784,6 +785,7 @@ dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
static int
dpif_linux_execute(struct dpif *dpif_,
const struct nlattr *key, size_t key_len,
const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *packet)
{
@@ -801,6 +803,7 @@ dpif_linux_execute(struct dpif *dpif_,
execute->dp_ifindex = dpif->dp_ifindex;
nl_msg_put_unspec(buf, ODP_PACKET_ATTR_PACKET, packet->data, packet->size);
nl_msg_put_unspec(buf, ODP_PACKET_ATTR_KEY, key, key_len);
nl_msg_put_unspec(buf, ODP_PACKET_ATTR_ACTIONS, actions, actions_len);
error = nl_sock_transact(genl_sock, buf, NULL);

View File

@@ -944,6 +944,7 @@ dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
static int
dpif_netdev_execute(struct dpif *dpif,
const struct nlattr *key_attrs, size_t key_len,
const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *packet)
{
@@ -975,7 +976,10 @@ dpif_netdev_execute(struct dpif *dpif,
* if we don't. */
copy = *packet;
}
flow_extract(&copy, 0, -1, &key);
dpif_netdev_flow_from_nlattrs(key_attrs, key_len, &key);
error = dp_netdev_execute_actions(dp, &copy, &key, actions, actions_len);
if (mutates) {
ofpbuf_uninit(&copy);

View File

@@ -282,9 +282,14 @@ struct dpif_class {
int (*flow_dump_done)(const struct dpif *dpif, void *state);
/* Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet
* frame specified in 'packet'. */
int (*execute)(struct dpif *dpif, const struct nlattr *actions,
size_t actions_len, const struct ofpbuf *packet);
* frame specified in 'packet' taken from the flow specified in the
* 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but
* it contains some metadata that cannot be recovered from 'packet', such
* as tun_id and in_port.) */
int (*execute)(struct dpif *dpif,
const struct nlattr *key, size_t key_len,
const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *packet);
/* Retrieves 'dpif''s "listen mask" into '*listen_mask'. A 1-bit of value
* 2**X set in '*listen_mask' indicates that 'dpif' will receive messages

View File

@@ -914,11 +914,15 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump)
}
/* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on
* the Ethernet frame specified in 'packet'.
* the Ethernet frame specified in 'packet' taken from the flow specified in
* the 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but
* it contains some metadata that cannot be recovered from 'packet', such as
* tun_id and in_port.)
*
* Returns 0 if successful, otherwise a positive errno value. */
int
dpif_execute(struct dpif *dpif,
const struct nlattr *key, size_t key_len,
const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *buf)
{
@@ -926,7 +930,8 @@ dpif_execute(struct dpif *dpif,
COVERAGE_INC(dpif_execute);
if (actions_len > 0) {
error = dpif->dpif_class->execute(dpif, actions, actions_len, buf);
error = dpif->dpif_class->execute(dpif, key, key_len,
actions, actions_len, buf);
} else {
error = 0;
}

View File

@@ -148,8 +148,10 @@ bool dpif_flow_dump_next(struct dpif_flow_dump *,
const struct dpif_flow_stats **);
int dpif_flow_dump_done(struct dpif_flow_dump *);
int dpif_execute(struct dpif *, const struct nlattr *actions,
size_t actions_len, const struct ofpbuf *);
int dpif_execute(struct dpif *,
const struct nlattr *key, size_t key_len,
const struct nlattr *actions, size_t actions_len,
const struct ofpbuf *);
enum dpif_upcall_type {
DPIF_UC_MISS, /* Miss in flow table. */

View File

@@ -2007,9 +2007,16 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow,
return true;
} else {
struct odputil_keybuf keybuf;
struct ofpbuf key;
int error;
error = dpif_execute(ofproto->dpif, odp_actions, actions_len, packet);
ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
odp_flow_key_from_flow(&key, flow);
error = dpif_execute(ofproto->dpif, key.data, key.size,
odp_actions, actions_len, packet);
ofpbuf_delete(packet);
return !error;
}
@@ -2671,12 +2678,20 @@ static int
send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port,
const struct ofpbuf *packet)
{
struct ofpbuf odp_actions;
struct ofpbuf key, odp_actions;
struct odputil_keybuf keybuf;
struct flow flow;
int error;
flow_extract((struct ofpbuf *) packet, 0, 0, &flow);
ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
odp_flow_key_from_flow(&key, &flow);
ofpbuf_init(&odp_actions, 32);
nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_OUTPUT, odp_port);
error = dpif_execute(ofproto->dpif, odp_actions.data, odp_actions.size,
error = dpif_execute(ofproto->dpif,
key.data, key.size,
odp_actions.data, odp_actions.size,
packet);
ofpbuf_uninit(&odp_actions);
@@ -3721,13 +3736,18 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
error = validate_actions(ofp_actions, n_ofp_actions, flow,
ofproto->max_ports);
if (!error) {
struct odputil_keybuf keybuf;
struct action_xlate_ctx ctx;
struct ofpbuf *odp_actions;
struct ofpbuf key;
ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
odp_flow_key_from_flow(&key, flow);
action_xlate_ctx_init(&ctx, ofproto, flow, packet);
odp_actions = xlate_actions(&ctx, ofp_actions, n_ofp_actions);
dpif_execute(ofproto->dpif, odp_actions->data, odp_actions->size,
packet);
dpif_execute(ofproto->dpif, key.data, key.size,
odp_actions->data, odp_actions->size, packet);
ofpbuf_delete(odp_actions);
}
return error;