mirror of
https://github.com/openvswitch/ovs
synced 2025-08-31 14:25:26 +00:00
dpif-netdev: user space datapath recirculation
Add basic recirculation infrastructure and user space data path support for it. The following bond mega flow patch will make use of this infrastructure. Signed-off-by: Andy Zhou <azhou@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
This commit is contained in:
@@ -307,11 +307,13 @@ enum ovs_key_attr {
|
||||
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
|
||||
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */
|
||||
OVS_KEY_ATTR_TCP_FLAGS, /* be16 TCP flags. */
|
||||
|
||||
#ifdef __KERNEL__
|
||||
OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
|
||||
#endif
|
||||
|
||||
OVS_KEY_ATTR_DP_HASH = 20, /* u32 hash value */
|
||||
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
|
||||
|
||||
OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls.
|
||||
* The implementation may restrict
|
||||
* the accepted length of the array. */
|
||||
@@ -532,6 +534,29 @@ struct ovs_action_push_vlan {
|
||||
__be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */
|
||||
};
|
||||
|
||||
/* Data path hash algorithm for computing Datapath hash.
|
||||
*
|
||||
* The Algorithm type only specifies the fields in a flow
|
||||
* will be used as part of the hash. Each datapath is free
|
||||
* to use its own hash algorithm. The hash value will be
|
||||
* opaque to the user space daemon.
|
||||
*/
|
||||
enum ovs_recirc_hash_alg {
|
||||
OVS_RECIRC_HASH_ALG_NONE,
|
||||
OVS_RECIRC_HASH_ALG_L4,
|
||||
};
|
||||
/*
|
||||
* struct ovs_action_recirc - %OVS_ACTION_ATTR_RECIRC action argument.
|
||||
* @recirc_id: The Recirculation label, Zero is invalid.
|
||||
* @hash_alg: Algorithm used to compute hash prior to recirculation.
|
||||
* @hash_bias: bias used for computing hash. used to compute hash prior to recirculation.
|
||||
*/
|
||||
struct ovs_action_recirc {
|
||||
uint32_t hash_alg; /* One of ovs_dp_hash_alg. */
|
||||
uint32_t hash_bias;
|
||||
uint32_t recirc_id; /* Recirculation label. */
|
||||
};
|
||||
|
||||
/**
|
||||
* enum ovs_action_attr - Action types.
|
||||
*
|
||||
@@ -555,6 +580,7 @@ struct ovs_action_push_vlan {
|
||||
* indicate the new packet contents. This could potentially still be
|
||||
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
|
||||
* is no MPLS label stack, as determined by ethertype, no action is taken.
|
||||
* @OVS_ACTION_RECIRC: Recirculate within the data path.
|
||||
*
|
||||
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
|
||||
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
|
||||
@@ -571,6 +597,7 @@ enum ovs_action_attr {
|
||||
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
|
||||
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
|
||||
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
|
||||
OVS_ACTION_ATTR_RECIRC, /* struct ovs_action_recirc. */
|
||||
__OVS_ACTION_ATTR_MAX
|
||||
};
|
||||
|
||||
|
@@ -2082,7 +2082,7 @@ struct dp_netdev_execute_aux {
|
||||
|
||||
static void
|
||||
dp_execute_cb(void *aux_, struct ofpbuf *packet,
|
||||
const struct pkt_metadata *md OVS_UNUSED,
|
||||
struct pkt_metadata *md,
|
||||
const struct nlattr *a, bool may_steal)
|
||||
OVS_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
@@ -2114,6 +2114,24 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OVS_ACTION_ATTR_RECIRC: {
|
||||
const struct ovs_action_recirc *act;
|
||||
act = nl_attr_get(a);
|
||||
md->recirc_id =act->recirc_id;
|
||||
md->dp_hash = 0;
|
||||
|
||||
if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
|
||||
struct flow flow;
|
||||
|
||||
flow_extract(packet, md, &flow);
|
||||
md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias);
|
||||
}
|
||||
|
||||
dp_netdev_port_input(aux->dp, packet, md);
|
||||
break;
|
||||
}
|
||||
|
||||
case OVS_ACTION_ATTR_PUSH_VLAN:
|
||||
case OVS_ACTION_ATTR_POP_VLAN:
|
||||
case OVS_ACTION_ATTR_PUSH_MPLS:
|
||||
|
@@ -1108,7 +1108,7 @@ struct dpif_execute_helper_aux {
|
||||
* meaningful. */
|
||||
static void
|
||||
dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
|
||||
const struct pkt_metadata *md,
|
||||
struct pkt_metadata *md,
|
||||
const struct nlattr *action, bool may_steal OVS_UNUSED)
|
||||
{
|
||||
struct dpif_execute_helper_aux *aux = aux_;
|
||||
@@ -1133,6 +1133,7 @@ dpif_execute_helper_cb(void *aux_, struct ofpbuf *packet,
|
||||
case OVS_ACTION_ATTR_SET:
|
||||
case OVS_ACTION_ATTR_SAMPLE:
|
||||
case OVS_ACTION_ATTR_UNSPEC:
|
||||
case OVS_ACTION_ATTR_RECIRC:
|
||||
case __OVS_ACTION_ATTR_MAX:
|
||||
OVS_NOT_REACHED();
|
||||
}
|
||||
|
@@ -125,6 +125,14 @@ odp_execute_set_action(struct ofpbuf *packet, const struct nlattr *a,
|
||||
set_arp(packet, nl_attr_get_unspec(a, sizeof(struct ovs_key_arp)));
|
||||
break;
|
||||
|
||||
case OVS_KEY_ATTR_DP_HASH:
|
||||
md->dp_hash = nl_attr_get_u32(a);
|
||||
break;
|
||||
|
||||
case OVS_KEY_ATTR_RECIRC_ID:
|
||||
md->recirc_id = nl_attr_get_u32(a);
|
||||
break;
|
||||
|
||||
case OVS_KEY_ATTR_UNSPEC:
|
||||
case OVS_KEY_ATTR_ENCAP:
|
||||
case OVS_KEY_ATTR_ETHERTYPE:
|
||||
@@ -197,6 +205,7 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal,
|
||||
/* These only make sense in the context of a datapath. */
|
||||
case OVS_ACTION_ATTR_OUTPUT:
|
||||
case OVS_ACTION_ATTR_USERSPACE:
|
||||
case OVS_ACTION_ATTR_RECIRC:
|
||||
if (dp_execute_action) {
|
||||
bool may_steal;
|
||||
/* Allow 'dp_execute_action' to steal the packet data if we do
|
||||
|
@@ -28,7 +28,7 @@ struct ofpbuf;
|
||||
struct pkt_metadata;
|
||||
|
||||
typedef void (*odp_execute_cb)(void *dp, struct ofpbuf *packet,
|
||||
const struct pkt_metadata *,
|
||||
struct pkt_metadata *,
|
||||
const struct nlattr *action, bool may_steal);
|
||||
|
||||
/* Actions that need to be executed in the context of a datapath are handed
|
||||
|
@@ -79,6 +79,7 @@ odp_action_len(uint16_t type)
|
||||
case OVS_ACTION_ATTR_POP_VLAN: return 0;
|
||||
case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls);
|
||||
case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16);
|
||||
case OVS_ACTION_ATTR_RECIRC: return sizeof(struct ovs_action_recirc);
|
||||
case OVS_ACTION_ATTR_SET: return -2;
|
||||
case OVS_ACTION_ATTR_SAMPLE: return -2;
|
||||
|
||||
@@ -118,6 +119,8 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize)
|
||||
case OVS_KEY_ATTR_ARP: return "arp";
|
||||
case OVS_KEY_ATTR_ND: return "nd";
|
||||
case OVS_KEY_ATTR_MPLS: return "mpls";
|
||||
case OVS_KEY_ATTR_DP_HASH: return "dp_hash";
|
||||
case OVS_KEY_ATTR_RECIRC_ID: return "recirc_id";
|
||||
|
||||
case __OVS_KEY_ATTR_MAX:
|
||||
default:
|
||||
@@ -383,6 +386,19 @@ format_mpls(struct ds *ds, const struct ovs_key_mpls *mpls_key,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
format_odp_recirc_action(struct ds *ds,
|
||||
const struct ovs_action_recirc *act)
|
||||
{
|
||||
ds_put_format(ds, "recirc(");
|
||||
|
||||
if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) {
|
||||
ds_put_format(ds, "hash_l4(%"PRIu32"), ", act->hash_bias);
|
||||
}
|
||||
|
||||
ds_put_format(ds, "%"PRIu32")", act->recirc_id);
|
||||
}
|
||||
|
||||
static void
|
||||
format_odp_action(struct ds *ds, const struct nlattr *a)
|
||||
{
|
||||
@@ -405,6 +421,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
|
||||
case OVS_ACTION_ATTR_USERSPACE:
|
||||
format_odp_userspace_action(ds, a);
|
||||
break;
|
||||
case OVS_ACTION_ATTR_RECIRC:
|
||||
format_odp_recirc_action(ds, nl_attr_get(a));
|
||||
break;
|
||||
case OVS_ACTION_ATTR_SET:
|
||||
ds_put_cstr(ds, "set(");
|
||||
format_odp_key_attr(nl_attr_get(a), NULL, NULL, ds, true);
|
||||
@@ -730,6 +749,8 @@ odp_flow_key_attr_len(uint16_t type)
|
||||
case OVS_KEY_ATTR_ENCAP: return -2;
|
||||
case OVS_KEY_ATTR_PRIORITY: return 4;
|
||||
case OVS_KEY_ATTR_SKB_MARK: return 4;
|
||||
case OVS_KEY_ATTR_DP_HASH: return 4;
|
||||
case OVS_KEY_ATTR_RECIRC_ID: return 4;
|
||||
case OVS_KEY_ATTR_TUNNEL: return -2;
|
||||
case OVS_KEY_ATTR_IN_PORT: return 4;
|
||||
case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet);
|
||||
@@ -1025,6 +1046,8 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
|
||||
|
||||
case OVS_KEY_ATTR_PRIORITY:
|
||||
case OVS_KEY_ATTR_SKB_MARK:
|
||||
case OVS_KEY_ATTR_DP_HASH:
|
||||
case OVS_KEY_ATTR_RECIRC_ID:
|
||||
ds_put_format(ds, "%#"PRIx32, nl_attr_get_u32(a));
|
||||
if (!is_exact) {
|
||||
ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma));
|
||||
@@ -1386,7 +1409,6 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma,
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OVS_KEY_ATTR_UNSPEC:
|
||||
case __OVS_KEY_ATTR_MAX:
|
||||
default:
|
||||
@@ -1618,6 +1640,36 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names,
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t recirc_id;
|
||||
int n = -1;
|
||||
|
||||
if (ovs_scan(s, "recirc_id(%"SCNi32")%n", &recirc_id, &n)) {
|
||||
nl_msg_put_u32(key, OVS_KEY_ATTR_RECIRC_ID, recirc_id);
|
||||
nl_msg_put_u32(mask, OVS_KEY_ATTR_RECIRC_ID, UINT32_MAX);
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t dp_hash;
|
||||
uint32_t dp_hash_mask;
|
||||
int n = -1;
|
||||
|
||||
if (mask && ovs_scan(s, "dp_hash(%"SCNi32"/%"SCNi32")%n", &dp_hash,
|
||||
&dp_hash_mask, &n)) {
|
||||
nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash);
|
||||
nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, dp_hash_mask);
|
||||
return n;
|
||||
} else if (ovs_scan(s, "dp_hash(%"SCNi32")%n", &dp_hash, &n)) {
|
||||
nl_msg_put_u32(key, OVS_KEY_ATTR_DP_HASH, dp_hash);
|
||||
if (mask) {
|
||||
nl_msg_put_u32(mask, OVS_KEY_ATTR_DP_HASH, UINT32_MAX);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
uint64_t tun_id, tun_id_mask;
|
||||
struct flow_tnl tun_key, tun_key_mask;
|
||||
@@ -2438,6 +2490,14 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data,
|
||||
|
||||
nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark);
|
||||
|
||||
if (flow->recirc_id) {
|
||||
nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id);
|
||||
}
|
||||
|
||||
if (flow->dp_hash) {
|
||||
nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, data->dp_hash);
|
||||
}
|
||||
|
||||
/* Add an ingress port attribute if this is a mask or 'odp_in_port'
|
||||
* is not the magical value "ODPP_NONE". */
|
||||
if (is_mask || odp_in_port != ODPP_NONE) {
|
||||
@@ -2673,13 +2733,24 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (type == OVS_KEY_ATTR_PRIORITY) {
|
||||
switch (type) {
|
||||
case OVS_KEY_ATTR_RECIRC_ID:
|
||||
md->recirc_id = nl_attr_get_u32(nla);
|
||||
wanted_attrs &= ~(1u << OVS_KEY_ATTR_RECIRC_ID);
|
||||
break;
|
||||
case OVS_KEY_ATTR_DP_HASH:
|
||||
md->dp_hash = nl_attr_get_u32(nla);
|
||||
wanted_attrs &= ~(1u << OVS_KEY_ATTR_DP_HASH);
|
||||
break;
|
||||
case OVS_KEY_ATTR_PRIORITY:
|
||||
md->skb_priority = nl_attr_get_u32(nla);
|
||||
wanted_attrs &= ~(1u << OVS_KEY_ATTR_PRIORITY);
|
||||
} else if (type == OVS_KEY_ATTR_SKB_MARK) {
|
||||
break;
|
||||
case OVS_KEY_ATTR_SKB_MARK:
|
||||
md->pkt_mark = nl_attr_get_u32(nla);
|
||||
wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK);
|
||||
} else if (type == OVS_KEY_ATTR_TUNNEL) {
|
||||
break;
|
||||
case OVS_KEY_ATTR_TUNNEL: {
|
||||
enum odp_key_fitness res;
|
||||
|
||||
res = odp_tun_key_from_attr(nla, &md->tunnel);
|
||||
@@ -2688,9 +2759,14 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len,
|
||||
} else if (res == ODP_FIT_PERFECT) {
|
||||
wanted_attrs &= ~(1u << OVS_KEY_ATTR_TUNNEL);
|
||||
}
|
||||
} else if (type == OVS_KEY_ATTR_IN_PORT) {
|
||||
break;
|
||||
}
|
||||
case OVS_KEY_ATTR_IN_PORT:
|
||||
md->in_port.odp_port = nl_attr_get_odp_port(nla);
|
||||
wanted_attrs &= ~(1u << OVS_KEY_ATTR_IN_PORT);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!wanted_attrs) {
|
||||
@@ -3226,6 +3302,18 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
|
||||
expected_attrs = 0;
|
||||
|
||||
/* Metadata. */
|
||||
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID)) {
|
||||
flow->recirc_id = nl_attr_get_u32(attrs[OVS_KEY_ATTR_RECIRC_ID]);
|
||||
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID;
|
||||
} else if (is_mask) {
|
||||
/* Always exact match recirc_id when datapath does not sepcify it. */
|
||||
flow->recirc_id = UINT32_MAX;
|
||||
}
|
||||
|
||||
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_DP_HASH)) {
|
||||
flow->dp_hash = nl_attr_get_u32(attrs[OVS_KEY_ATTR_DP_HASH]);
|
||||
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_DP_HASH;
|
||||
}
|
||||
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PRIORITY)) {
|
||||
flow->skb_priority = nl_attr_get_u32(attrs[OVS_KEY_ATTR_PRIORITY]);
|
||||
expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PRIORITY;
|
||||
|
@@ -33,6 +33,11 @@ struct ds;
|
||||
|
||||
/* Datapath packet metadata */
|
||||
struct pkt_metadata {
|
||||
uint32_t recirc_id; /* Recirculation id carried with the
|
||||
recirculating packets. 0 for packets
|
||||
received from the wire. */
|
||||
uint32_t dp_hash; /* hash value computed by the recirculation
|
||||
action. */
|
||||
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
|
||||
uint32_t skb_priority; /* Packet priority for QoS. */
|
||||
uint32_t pkt_mark; /* Packet mark. */
|
||||
@@ -40,13 +45,15 @@ struct pkt_metadata {
|
||||
};
|
||||
|
||||
#define PKT_METADATA_INITIALIZER(PORT) \
|
||||
(struct pkt_metadata){ { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} }
|
||||
(struct pkt_metadata){ 0, 0, { 0, 0, 0, 0, 0, 0}, 0, 0, {(PORT)} }
|
||||
|
||||
static inline struct pkt_metadata
|
||||
pkt_metadata_from_flow(const struct flow *flow)
|
||||
{
|
||||
struct pkt_metadata md;
|
||||
|
||||
md.recirc_id = flow->recirc_id;
|
||||
md.dp_hash = flow->dp_hash;
|
||||
md.tunnel = flow->tunnel;
|
||||
md.skb_priority = flow->skb_priority;
|
||||
md.pkt_mark = flow->pkt_mark;
|
||||
|
@@ -135,6 +135,64 @@ void ofproto_dpif_flow_mod(struct ofproto_dpif *, struct ofputil_flow_mod *);
|
||||
|
||||
struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t);
|
||||
|
||||
/*
|
||||
* Recirculation
|
||||
* =============
|
||||
*
|
||||
* Recirculation is a technique to allow a frame to re-enter the packet processing
|
||||
* path for one or multiple times to achieve more flexible packet processing in the
|
||||
* data path. MPLS handling and selecting bond slave port of a bond ports.
|
||||
*
|
||||
* Data path and user space interface
|
||||
* -----------------------------------
|
||||
*
|
||||
* Two new fields, recirc_id and dp_hash, are added to the current flow data structure.
|
||||
* They are both both of type uint32_t. In addition, a new action, RECIRC, are added.
|
||||
*
|
||||
* The value recirc_id is used to distinguish a packet from multiple iterations of
|
||||
* recirculation. A packet initially received is considered of having recirc_id of 0.
|
||||
* Recirc_id is managed by the user space, opaque to the data path.
|
||||
*
|
||||
* On the other hand, dp_hash can only be computed by the data path, opaque to
|
||||
* the user space. In fact, user space may not able to recompute the hash value.
|
||||
* The dp_hash value should be wildcarded when for a newly received packet.
|
||||
* RECIRC action specifies whether the hash is computed. If computed, how many
|
||||
* fields to be included in the hash computation. The computed hash value is
|
||||
* stored into the dp_hash field prior to recirculation.
|
||||
*
|
||||
* The RECIRC action computes and set the dp_hash field, set the recirc_id field
|
||||
* and then reprocess the packet as if it was received on the same input port.
|
||||
* RECIRC action works like a function call; actions listed behind the RECIRC
|
||||
* action will be executed after its execution. RECIRC action can be nested,
|
||||
* data path implementation limits the number of recirculation executed
|
||||
* to prevent unreasonable nesting depth or infinite loop.
|
||||
*
|
||||
* Both flow fields and the RECIRC action are exposed as open flow fields via
|
||||
* Nicira extensions.
|
||||
*
|
||||
* Post recirculation flow
|
||||
* ------------------------
|
||||
*
|
||||
* At the open flow level, post recirculation rules are always hidden from the
|
||||
* controller. They are installed in table 254 which is set up as a hidden table
|
||||
* during boot time. Those rules are managed by the local user space program only.
|
||||
*
|
||||
* To speed up the classifier look up process, recirc_id is always reflected into
|
||||
* the metadata field, since recirc_id is required to be exactly matched.
|
||||
*
|
||||
* Classifier look up always starts with table 254. A post recirculation flow
|
||||
* lookup should find its hidden rule within this table. On the other hand, A
|
||||
* newly received packet should miss all post recirculation rules because its
|
||||
* recirc_id is zero, then hit a pre-installed lower priority rule to redirect
|
||||
* classifier to look up starting from table 0:
|
||||
*
|
||||
* * , actions=resubmit(,0)
|
||||
*
|
||||
* Post recirculation data path flows are managed like other data path flows.
|
||||
* They are created on demand. Miss handling, stats collection and revalidation
|
||||
* work the same way as regular flows.
|
||||
*/
|
||||
|
||||
uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto);
|
||||
void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id);
|
||||
#endif /* ofproto-dpif.h */
|
||||
|
Reference in New Issue
Block a user