2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 14:25:26 +00:00

dpif-netdev: user space datapath recirculation

Add basic recirculation infrastructure and user space
data path support for it. The following bond mega flow patch will
make use of this infrastructure.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
This commit is contained in:
Andy Zhou
2014-03-04 15:36:03 -08:00
parent f537461746
commit 572f732ab0
8 changed files with 218 additions and 10 deletions

View File

@@ -307,11 +307,13 @@ enum ovs_key_attr {
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
#ifdef __KERNEL__
OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
#endif
OVS_KEY_ATTR_DP_HASH = 20, /* u32 hash value */
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls.
* The implementation may restrict
* the accepted length of the array. */
@@ -532,6 +534,29 @@ struct ovs_action_push_vlan {
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
};
/* Data path hash algorithm for computing Datapath hash.
*
* The Algorithm type only specifies the fields in a flow
* will be used as part of the hash. Each datapath is free
* to use its own hash algorithm. The hash value will be
* opaque to the user space daemon.
*/
enum ovs_recirc_hash_alg {
OVS_RECIRC_HASH_ALG_NONE,
OVS_RECIRC_HASH_ALG_L4,
};
/*
* struct ovs_action_recirc - %OVS_ACTION_ATTR_RECIRC action argument.
* @recirc_id: The Recirculation label, Zero is invalid.
* @hash_alg: Algorithm used to compute hash prior to recirculation.
* @hash_bias: bias used for computing hash. used to compute hash prior to recirculation.
*/
struct ovs_action_recirc {
uint32_t hash_alg; /* One of ovs_dp_hash_alg. */
uint32_t hash_bias;
uint32_t recirc_id; /* Recirculation label. */
};
/**
* enum ovs_action_attr - Action types.
*
@@ -555,6 +580,7 @@ struct ovs_action_push_vlan {
* indicate the new packet contents. This could potentially still be
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
* @OVS_ACTION_RECIRC: Recirculate within the data path.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -571,6 +597,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
OVS_ACTION_ATTR_RECIRC, /* struct ovs_action_recirc. */
__OVS_ACTION_ATTR_MAX
};

View File

@@ -2082,7 +2082,7 @@ struct dp_netdev_execute_aux {
static void
dp_execute_cb(void *aux_, struct ofpbuf *packet,
const struct pkt_metadata *md OVS_UNUSED,
struct pkt_metadata *md,
const struct nlattr *a, bool may_steal)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
@@ -2114,6 +2114,24 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
}
break;
}
case OVS_ACTION_ATTR_RECIRC: {
const struct ovs_action_recirc *act;
act = nl_attr_get(a);
md->recirc_id =act->recirc_id;
md->dp_hash = 0;
if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
struct flow flow;
flow_extract(packet, md, &flow);
md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias);
}
dp_netdev_port_input(aux->dp, packet, md);
break;
}
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
case OVS_ACTION_ATTR_PUSH_MPLS:

View File

@@ -1108,7 +1108,7 @@ struct dpif_execute_helper_aux {
* meaningful. */
static void
dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
const struct pkt_metadata *md,
struct pkt_metadata *md,
const struct nlattr *action, bool may_steal OVS_UNUSED)
{
struct dpif_execute_helper_aux *aux = aux_;
@@ -1133,6 +1133,7 @@ dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
case OVS_ACTION_ATTR_SET:
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_UNSPEC:
case OVS_ACTION_ATTR_RECIRC:
case __OVS_ACTION_ATTR_MAX:
OVS_NOT_REACHED();
}

View File

@@ -125,6 +125,14 @@ odp_execute_set_action(struct ofpbuf *packet, const struct nlattr *a,
set_arp(packet, nl_attr_get_unspec(a, sizeof(struct ovs_key_arp)));
break;
case OVS_KEY_ATTR_DP_HASH:
md->dp_hash = nl_attr_get_u32(a);
break;
case OVS_KEY_ATTR_RECIRC_ID:
md->recirc_id = nl_attr_get_u32(a);
break;
case OVS_KEY_ATTR_UNSPEC:
case OVS_KEY_ATTR_ENCAP:
case OVS_KEY_ATTR_ETHERTYPE:
@@ -197,6 +205,7 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
/* These only make sense in the context of a datapath. */
case OVS_ACTION_ATTR_OUTPUT:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
if (dp_execute_action) {
bool may_steal;
/* Allow 'dp_execute_action' to steal the packet data if we do

View File

@@ -28,7 +28,7 @@ struct ofpbuf;
struct pkt_metadata;
typedef void (*odp_execute_cb)(void *dp, struct ofpbuf *packet,
const struct pkt_metadata *,
struct pkt_metadata *,
const struct nlattr *action, bool may_steal);
/* Actions that need to be executed in the context of a datapath are handed

View File

@@ -79,6 +79,7 @@ odp_action_len(uint16_t type)
case OVS_ACTION_ATTR_POP_VLAN: return 0;
case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls);
case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16);
case OVS_ACTION_ATTR_RECIRC: return sizeof(struct ovs_action_recirc);
case OVS_ACTION_ATTR_SET: return -2;
case OVS_ACTION_ATTR_SAMPLE: return -2;
@@ -118,6 +119,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize)
case OVS_KEY_ATTR_ARP: return "arp";
case OVS_KEY_ATTR_ND: return "nd";
case OVS_KEY_ATTR_MPLS: return "mpls";
case OVS_KEY_ATTR_DP_HASH: return "dp_hash";
case OVS_KEY_ATTR_RECIRC_ID: return "recirc_id";
case __OVS_KEY_ATTR_MAX:
default:
@@ -383,6 +386,19 @@ format_mpls(struct ds *ds, const struct ovs_key_mpls *mpls_key,
}
}
static void
format_odp_recirc_action(struct ds *ds,
const struct ovs_action_recirc *act)
{
ds_put_format(ds, "recirc(");
if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
ds_put_format(ds, "hash_l4(%"PRIu32"), ", act->hash_bias);
}
ds_put_format(ds, "%"PRIu32")", act->recirc_id);
}
static void
format_odp_action(struct ds *ds, const struct nlattr *a)
{
@@ -405,6 +421,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
case OVS_ACTION_ATTR_USERSPACE:
format_odp_userspace_action(ds, a);
break;
case OVS_ACTION_ATTR_RECIRC:
format_odp_recirc_action(ds, nl_attr_get(a));
break;
case OVS_ACTION_ATTR_SET:
ds_put_cstr(ds, "set(");
format_odp_key_attr(nl_attr_get(a), NULL, NULL, ds, true);
@@ -730,6 +749,8 @@ odp_flow_key_attr_len(uint16_t type)
case OVS_KEY_ATTR_ENCAP: return -2;
case OVS_KEY_ATTR_PRIORITY: return 4;
case OVS_KEY_ATTR_SKB_MARK: return 4;
case OVS_KEY_ATTR_DP_HASH: return 4;
case OVS_KEY_ATTR_RECIRC_ID: return 4;
case OVS_KEY_ATTR_TUNNEL: return -2;
case OVS_KEY_ATTR_IN_PORT: return 4;
case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet);
@@ -1025,6 +1046,8 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_DP_HASH:
case OVS_KEY_ATTR_RECIRC_ID:
ds_put_format(ds, "%#"PRIx32, nl_attr_get_u32(a));
if (!is_exact) {
ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma));
@@ -1386,7 +1409,6 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
}
break;
}
case OVS_KEY_ATTR_UNSPEC:
case __OVS_KEY_ATTR_MAX:
default:
@@ -1618,6 +1640,36 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names,
}
}
{
uint32_t recirc_id;
int n = -1;
if (ovs_scan(s, "recirc_id(%"SCNi32")%n", &recirc_id, &n)) {
nl_msg_put_u32(key, OVS_KEY_ATTR_RECIRC_ID, recirc_id);
nl_msg_put_u32(mask, OVS_KEY_ATTR_RECIRC_ID, UINT32_MAX);
return n;
}
}
{
uint32_t dp_hash;
uint32_t dp_hash_mask;
int n = -1;
if (mask && ovs_scan(s, "dp_hash(%"SCNi32"/%"SCNi32")%n", &dp_hash,
&dp_hash_mask, &n)) {
nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash);
nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, dp_hash_mask);
return n;
} else if (ovs_scan(s, "dp_hash(%"SCNi32")%n", &dp_hash, &n)) {
nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash);
if (mask) {
nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, UINT32_MAX);
}
return n;
}
}
{
uint64_t tun_id, tun_id_mask;
struct flow_tnl tun_key, tun_key_mask;
@@ -2438,6 +2490,14 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data,
nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark);
if (flow->recirc_id) {
nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id);
}
if (flow->dp_hash) {
nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, data->dp_hash);
}
/* Add an ingress port attribute if this is a mask or 'odp_in_port'
* is not the magical value "ODPP_NONE". */
if (is_mask || odp_in_port != ODPP_NONE) {
@@ -2673,13 +2733,24 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
continue;
}
if (type == OVS_KEY_ATTR_PRIORITY) {
switch (type) {
case OVS_KEY_ATTR_RECIRC_ID:
md->recirc_id = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_RECIRC_ID);
break;
case OVS_KEY_ATTR_DP_HASH:
md->dp_hash = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_DP_HASH);
break;
case OVS_KEY_ATTR_PRIORITY:
md->skb_priority = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_PRIORITY);
} else if (type == OVS_KEY_ATTR_SKB_MARK) {
break;
case OVS_KEY_ATTR_SKB_MARK:
md->pkt_mark = nl_attr_get_u32(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK);
} else if (type == OVS_KEY_ATTR_TUNNEL) {
break;
case OVS_KEY_ATTR_TUNNEL: {
enum odp_key_fitness res;
res = odp_tun_key_from_attr(nla, &md->tunnel);
@@ -2688,9 +2759,14 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
} else if (res == ODP_FIT_PERFECT) {
wanted_attrs &= ~(1u << OVS_KEY_ATTR_TUNNEL);
}
} else if (type == OVS_KEY_ATTR_IN_PORT) {
break;
}
case OVS_KEY_ATTR_IN_PORT:
md->in_port.odp_port = nl_attr_get_odp_port(nla);
wanted_attrs &= ~(1u << OVS_KEY_ATTR_IN_PORT);
break;
default:
break;
}
if (!wanted_attrs) {
@@ -3226,6 +3302,18 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
expected_attrs = 0;
/* Metadata. */
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID)) {
flow->recirc_id = nl_attr_get_u32(attrs[OVS_KEY_ATTR_RECIRC_ID]);
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID;
} else if (is_mask) {
/* Always exact match recirc_id when datapath does not sepcify it. */
flow->recirc_id = UINT32_MAX;
}
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_DP_HASH)) {
flow->dp_hash = nl_attr_get_u32(attrs[OVS_KEY_ATTR_DP_HASH]);
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_DP_HASH;
}
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PRIORITY)) {
flow->skb_priority = nl_attr_get_u32(attrs[OVS_KEY_ATTR_PRIORITY]);
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PRIORITY;

View File

@@ -33,6 +33,11 @@ struct ds;
/* Datapath packet metadata */
struct pkt_metadata {
uint32_t recirc_id; /* Recirculation id carried with the
recirculating packets. 0 for packets
received from the wire. */
uint32_t dp_hash; /* hash value computed by the recirculation
action. */
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
uint32_t skb_priority; /* Packet priority for QoS. */
uint32_t pkt_mark; /* Packet mark. */
@@ -40,13 +45,15 @@ struct pkt_metadata {
};
#define PKT_METADATA_INITIALIZER(PORT) \
(struct pkt_metadata){ { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} }
(struct pkt_metadata){ 0, 0, { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} }
static inline struct pkt_metadata
pkt_metadata_from_flow(const struct flow *flow)
{
struct pkt_metadata md;
md.recirc_id = flow->recirc_id;
md.dp_hash = flow->dp_hash;
md.tunnel = flow->tunnel;
md.skb_priority = flow->skb_priority;
md.pkt_mark = flow->pkt_mark;

View File

@@ -135,6 +135,64 @@ void ofproto_dpif_flow_mod(struct ofproto_dpif *, struct ofputil_flow_mod *);
struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t);
/*
* Recirculation
* =============
*
* Recirculation is a technique to allow a frame to re-enter the packet processing
* path for one or multiple times to achieve more flexible packet processing in the
* data path. MPLS handling and selecting bond slave port of a bond ports.
*
* Data path and user space interface
* -----------------------------------
*
* Two new fields, recirc_id and dp_hash, are added to the current flow data structure.
* They are both both of type uint32_t. In addition, a new action, RECIRC, are added.
*
* The value recirc_id is used to distinguish a packet from multiple iterations of
* recirculation. A packet initially received is considered of having recirc_id of 0.
* Recirc_id is managed by the user space, opaque to the data path.
*
* On the other hand, dp_hash can only be computed by the data path, opaque to
* the user space. In fact, user space may not able to recompute the hash value.
* The dp_hash value should be wildcarded when for a newly received packet.
* RECIRC action specifies whether the hash is computed. If computed, how many
* fields to be included in the hash computation. The computed hash value is
* stored into the dp_hash field prior to recirculation.
*
* The RECIRC action computes and set the dp_hash field, set the recirc_id field
* and then reprocess the packet as if it was received on the same input port.
* RECIRC action works like a function call; actions listed behind the RECIRC
* action will be executed after its execution. RECIRC action can be nested,
* data path implementation limits the number of recirculation executed
* to prevent unreasonable nesting depth or infinite loop.
*
* Both flow fields and the RECIRC action are exposed as open flow fields via
* Nicira extensions.
*
* Post recirculation flow
* ------------------------
*
* At the open flow level, post recirculation rules are always hidden from the
* controller. They are installed in table 254 which is set up as a hidden table
* during boot time. Those rules are managed by the local user space program only.
*
* To speed up the classifier look up process, recirc_id is always reflected into
* the metadata field, since recirc_id is required to be exactly matched.
*
* Classifier look up always starts with table 254. A post recirculation flow
* lookup should find its hidden rule within this table. On the other hand, A
* newly received packet should miss all post recirculation rules because its
* recirc_id is zero, then hit a pre-installed lower priority rule to redirect
* classifier to look up starting from table 0:
*
* * , actions=resubmit(,0)
*
* Post recirculation data path flows are managed like other data path flows.
* They are created on demand. Miss handling, stats collection and revalidation
* work the same way as regular flows.
*/
uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto);
void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id);
#endif /* ofproto-dpif.h */