diff --git a/datapath/actions.c b/datapath/actions.c index 32e44c040..cba12e66c 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -28,8 +28,8 @@ #include "vlan.h" #include "vport.h" -static int do_execute_actions(struct datapath *, struct sk_buff *, - struct sw_flow_actions *acts); +static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr, int len, bool keep_skb); static int make_writable(struct sk_buff *skb, int write_len) { @@ -255,15 +255,37 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, u64 arg) upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.key = &OVS_CB(skb)->flow->key; upcall.userdata = arg; - upcall.sample_pool = 0; - upcall.actions = NULL; - upcall.actions_len = 0; return dp_upcall(dp, skb, &upcall); } +static int sample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{ + const struct nlattr *acts_list = NULL; + const struct nlattr *a; + int rem; + + for (a = nla_data(attr), rem = nla_len(attr); rem > 0; + a = nla_next(a, &rem)) { + switch (nla_type(a)) { + case OVS_SAMPLE_ATTR_PROBABILITY: + if (net_random() >= nla_get_u32(a)) + return 0; + break; + + case OVS_SAMPLE_ATTR_ACTIONS: + acts_list = a; + break; + } + } + + return do_execute_actions(dp, skb, nla_data(acts_list), + nla_len(acts_list), true); +} + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_actions *acts) + const struct nlattr *attr, int len, bool keep_skb) { /* Every output action needs a separate clone of 'skb', but the common * case is just a single output action, so that doing a clone and @@ -274,7 +296,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct nlattr *a; int rem; - for (a = acts->actions, rem = acts->actions_len; rem > 0; + for (a = attr, rem = len; rem > 0; a = nla_next(a, &rem)) { int err = 0; @@ -339,49 +361,29 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_POP_PRIORITY: skb->priority = priority; break; - } + case OVS_ACTION_ATTR_SAMPLE: + err = sample(dp, skb, a); + break; + + } if (unlikely(err)) { kfree_skb(skb); return err; } } - if (prev_port != -1) + if (prev_port != -1) { + if (keep_skb) + skb = skb_clone(skb, GFP_ATOMIC); + do_output(dp, skb, prev_port); - else + } else if (!keep_skb) consume_skb(skb); return 0; } -static void sflow_sample(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_actions *acts) -{ - struct sk_buff *nskb; - struct vport *p = OVS_CB(skb)->vport; - struct dp_upcall_info upcall; - - if (unlikely(!p)) - return; - - atomic_inc(&p->sflow_pool); - if (net_random() >= dp->sflow_probability) - return; - - nskb = skb_clone(skb, GFP_ATOMIC); - if (unlikely(!nskb)) - return; - - upcall.cmd = OVS_PACKET_CMD_SAMPLE; - upcall.key = &OVS_CB(skb)->flow->key; - upcall.userdata = 0; - upcall.sample_pool = atomic_read(&p->sflow_pool); - upcall.actions = acts->actions; - upcall.actions_len = acts->actions_len; - dp_upcall(dp, nskb, &upcall); -} - /* Execute a list of actions against 'skb'. */ int execute_actions(struct datapath *dp, struct sk_buff *skb) { @@ -399,11 +401,9 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb) goto out_loop; } - /* Really execute actions. */ - if (dp->sflow_probability) - sflow_sample(dp, skb, acts); OVS_CB(skb)->tun_id = 0; - error = do_execute_actions(dp, skb, acts); + error = do_execute_actions(dp, skb, acts->actions, + acts->actions_len, false); /* Check whether sub-actions looped too much. */ if (unlikely(loop->looping)) diff --git a/datapath/datapath.c b/datapath/datapath.c index 4d40ac3bd..63a393221 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -310,10 +310,6 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; - upcall.userdata = 0; - upcall.sample_pool = 0; - upcall.actions = NULL; - upcall.actions_len = 0; dp_upcall(dp, skb, &upcall); stats_counter_off = offsetof(struct dp_stats_percpu, n_missed); goto out; @@ -453,12 +449,8 @@ static int queue_userspace_packets(struct datapath *dp, u32 pid, len = sizeof(struct ovs_header); len += nla_total_size(skb->len); len += nla_total_size(FLOW_BUFSIZE); - if (upcall_info->userdata) + if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) len += nla_total_size(8); - if (upcall_info->sample_pool) - len += nla_total_size(4); - if (upcall_info->actions_len) - len += nla_total_size(upcall_info->actions_len); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { @@ -473,18 +465,9 @@ static int queue_userspace_packets(struct datapath *dp, u32 pid, flow_to_nlattrs(upcall_info->key, user_skb); nla_nest_end(user_skb, nla); - if (upcall_info->userdata) - nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, upcall_info->userdata); - if (upcall_info->sample_pool) - nla_put_u32(user_skb, OVS_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool); - if (upcall_info->actions_len) { - const struct nlattr *actions = upcall_info->actions; - u32 actions_len = upcall_info->actions_len; - - nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); - memcpy(__skb_put(user_skb, actions_len), actions, actions_len); - nla_nest_end(user_skb, nla); - } + if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) + nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, + upcall_info->userdata); nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -532,10 +515,37 @@ static int flush_flows(int dp_ifindex) return 0; } -static int validate_actions(const struct nlattr *attr) +static int validate_actions(const struct nlattr *attr, int depth); + +static int validate_sample(const struct nlattr *attr, int depth) +{ + static const struct nla_policy sample_policy[OVS_SAMPLE_ATTR_MAX + 1] = + { + [OVS_SAMPLE_ATTR_PROBABILITY] = {.type = NLA_U32 }, + [OVS_SAMPLE_ATTR_ACTIONS] = {.type = NLA_UNSPEC }, + }; + struct nlattr *a[OVS_SAMPLE_ATTR_MAX + 1]; + int error; + + error = nla_parse_nested(a, OVS_SAMPLE_ATTR_MAX, attr, sample_policy); + if (error) + return error; + + if (!a[OVS_SAMPLE_ATTR_PROBABILITY]) + return -EINVAL; + if (!a[OVS_SAMPLE_ATTR_ACTIONS]) + return -EINVAL; + + return validate_actions(a[OVS_SAMPLE_ATTR_ACTIONS], (depth + 1)); +} + +static int validate_actions(const struct nlattr *attr, int depth) { const struct nlattr *a; - int rem; + int rem, err; + + if (depth >= SAMPLE_ACTION_DEPTH) + return -EOVERFLOW; nla_for_each_nested(a, attr, rem) { static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -556,7 +566,12 @@ static int validate_actions(const struct nlattr *attr) }; int type = nla_type(a); - if (type > OVS_ACTION_ATTR_MAX || nla_len(a) != action_lens[type]) + /* Match expected attr len for given attr len except for + * OVS_ACTION_ATTR_SAMPLE, as it has nested actions list which + * is variable size. */ + if (type > OVS_ACTION_ATTR_MAX || + (nla_len(a) != action_lens[type] && + type != OVS_ACTION_ATTR_SAMPLE)) return -EINVAL; switch (type) { @@ -592,6 +607,12 @@ static int validate_actions(const struct nlattr *attr) return -EINVAL; break; + case OVS_ACTION_ATTR_SAMPLE: + err = validate_sample(a, depth); + if (err) + return err; + break; + default: return -EOPNOTSUPP; } @@ -630,7 +651,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN) goto err; - err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS]); + err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], 0); if (err) goto err; @@ -897,7 +918,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS]); + error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], 0); if (error) goto error; } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { @@ -1158,7 +1179,6 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { #endif [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 }, - [OVS_DP_ATTR_SAMPLING] = { .type = NLA_U32 }, }; static struct genl_family dp_datapath_genl_family = { @@ -1201,9 +1221,6 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, NLA_PUT_U32(skb, OVS_DP_ATTR_IPV4_FRAGS, dp->drop_frags ? OVS_DP_FRAG_DROP : OVS_DP_FRAG_ZERO); - if (dp->sflow_probability) - NLA_PUT_U32(skb, OVS_DP_ATTR_SAMPLING, dp->sflow_probability); - return genlmsg_end(skb, ovs_header); nla_put_failure: @@ -1265,8 +1282,6 @@ static void change_datapath(struct datapath *dp, struct nlattr *a[OVS_DP_ATTR_MA { if (a[OVS_DP_ATTR_IPV4_FRAGS]) dp->drop_frags = nla_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]) == OVS_DP_FRAG_DROP; - if (a[OVS_DP_ATTR_SAMPLING]) - dp->sflow_probability = nla_get_u32(a[OVS_DP_ATTR_SAMPLING]); } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) diff --git a/datapath/datapath.h b/datapath/datapath.h index 03bebd1ae..ea200a3ad 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -34,6 +34,8 @@ struct vport; #define DP_MAX_PORTS 1024 +#define SAMPLE_ACTION_DEPTH 3 + /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given * datapath. @@ -68,9 +70,6 @@ struct dp_stats_percpu { * @port_list: List of all ports in @ports in arbitrary order. RTNL required * to iterate or modify. * @stats_percpu: Per-CPU datapath statistics. - * @sflow_probability: Number of packets out of UINT_MAX to sample to the - * %OVS_PACKET_CMD_SAMPLE upcall, e.g. (@sflow_probability/UINT_MAX) - * is the probability of sampling a given packet. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. @@ -91,9 +90,6 @@ struct datapath { /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; - - /* sFlow Sampling */ - unsigned int sflow_probability; }; /** @@ -127,18 +123,13 @@ struct ovs_skb_cb { * struct dp_upcall - metadata to include with a packet to send to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. - * @userdata: Becomes %OVS_PACKET_ATTR_USERDATA if nonzero. - * @sample_pool: Becomes %OVS_PACKET_ATTR_SAMPLE_POOL if nonzero. - * @actions: Becomes %OVS_PACKET_ATTR_ACTIONS if nonnull. - * @actions_len: Number of bytes in @actions. -*/ + * @userdata: Is passed to user-space as %OVS_PACKET_ATTR_USERDATA if @cmd is + * %OVS_PACKET_CMD_ACTION. + */ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; u64 userdata; - u32 sample_pool; - const struct nlattr *actions; - u32 actions_len; }; extern struct notifier_block dp_device_notifier; diff --git a/datapath/vport.c b/datapath/vport.c index 4e922e49e..8826e75db 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -177,7 +177,6 @@ struct vport *vport_alloc(int priv_size, const struct vport_ops *ops, const stru vport->dp = parms->dp; vport->port_no = parms->port_no; vport->upcall_pid = parms->upcall_pid; - atomic_set(&vport->sflow_pool, 0); vport->ops = ops; /* Initialize kobject for bridge. This will be added as diff --git a/datapath/vport.h b/datapath/vport.h index 61da43996..b6b94e010 100644 --- a/datapath/vport.h +++ b/datapath/vport.h @@ -80,8 +80,6 @@ struct vport_err_stats { * &struct vport. (We keep this around so that we can delete it if the * device gets renamed.) Set to the null string when no link exists. * @node: Element in @dp's @port_list. - * @sflow_pool: Number of packets that were candidates for sFlow sampling, - * regardless of whether they were actually chosen and sent down to userspace. * @upcall_pid: The Netlink port to use for packets received on this port that * miss the flow table. * @hash_node: Element in @dev_table hash table in vport.c. @@ -99,7 +97,6 @@ struct vport { struct kobject kobj; char linkname[IFNAMSIZ]; struct list_head node; - atomic_t sflow_pool; u32 upcall_pid; struct hlist_node hash_node; diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index c48426f5d..3db960a9c 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -94,10 +94,6 @@ struct ovs_header { * @OVS_DP_ATTR_IPV4_FRAGS: One of %OVS_DP_FRAG_*. Always present in * notifications. May be included in %OVS_DP_NEW or %OVS_DP_SET requests to * change the fragment handling policy. - * @OVS_DP_ATTR_SAMPLING: 32-bit fraction of packets to sample with - * @OVS_PACKET_CMD_SAMPLE. A value of 0 samples no packets, a value of - * %UINT32_MAX samples all packets, and intermediate values sample intermediate - * fractions of packets. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_DP_* commands. @@ -108,7 +104,6 @@ enum ovs_datapath_attr { OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ OVS_DP_ATTR_IPV4_FRAGS, /* 32-bit enum ovs_frag_handling */ - OVS_DP_ATTR_SAMPLING, /* 32-bit fraction of packets to sample. */ __OVS_DP_ATTR_MAX }; @@ -157,7 +152,6 @@ enum ovs_packet_cmd { /* Kernel-to-user notifications. */ OVS_PACKET_CMD_MISS, /* Flow table miss. */ OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ - OVS_PACKET_CMD_SAMPLE, /* Sampled packet. */ /* User commands. */ OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ @@ -174,6 +168,8 @@ enum ovs_packet_cmd { * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows * userspace to adapt its flow setup strategy by comparing its notion of the * flow key against the kernel's. + * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used + * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. * @OVS_PACKET_ATTR_UPCALL_PID: Optionally present for OVS_PACKET_CMD_EXECUTE. * The Netlink socket in userspace that OVS_PACKET_CMD_USERSPACE and * OVS_PACKET_CMD_SAMPLE upcalls will be directed to for actions triggered by @@ -181,11 +177,6 @@ enum ovs_packet_cmd { * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION * notification if the %OVS_ACTION_ATTR_USERSPACE, action's argument was * nonzero. - * @OVS_PACKET_ATTR_SAMPLE_POOL: Present for %OVS_PACKET_CMD_SAMPLE. Contains - * the number of packets processed so far that were candidates for sampling. - * @OVS_PACKET_ATTR_ACTIONS: Present for %OVS_PACKET_CMD_SAMPLE. Contains a - * copy of the actions applied to the packet, as nested %OVS_ACTION_ATTR_* - * attributes. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. @@ -194,10 +185,9 @@ enum ovs_packet_attr { OVS_PACKET_ATTR_UNSPEC, OVS_PACKET_ATTR_PACKET, /* Packet data. */ OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ + OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ OVS_PACKET_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls. */ OVS_PACKET_ATTR_USERDATA, /* u64 OVS_ACTION_ATTR_USERSPACE arg. */ - OVS_PACKET_ATTR_SAMPLE_POOL, /* # sampling candidate packets so far. */ - OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ __OVS_PACKET_ATTR_MAX }; @@ -425,6 +415,24 @@ enum ovs_flow_attr { #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) +/** + * enum ovs_sample_attr - Attributes for OVS_ACTION_ATTR_SAMPLE + * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with + * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of + * %UINT32_MAX samples all packets and intermediate values sample intermediate + * fractions of packets. + * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. + * Actions are passed as nested attributes. + */ +enum ovs_sample_attr { + OVS_SAMPLE_ATTR_UNSPEC, + OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ + OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ + __OVS_SAMPLE_ATTR_MAX, +}; + +#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) + /* Action types. */ enum ovs_action_type { OVS_ACTION_ATTR_UNSPEC, @@ -442,6 +450,8 @@ enum ovs_action_type { OVS_ACTION_ATTR_SET_TUNNEL, /* Set the encapsulating tunnel ID. */ OVS_ACTION_ATTR_SET_PRIORITY, /* Set skb->priority. */ OVS_ACTION_ATTR_POP_PRIORITY, /* Restore original skb->priority. */ + OVS_ACTION_ATTR_SAMPLE, /* Execute list of actions at given + probability. */ __OVS_ACTION_ATTR_MAX }; diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 97b81442e..b195a7219 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -80,7 +80,6 @@ struct dpif_linux_dp { uint32_t upcall_pid; /* OVS_DP_UPCALL_PID. */ struct ovs_dp_stats stats; /* OVS_DP_ATTR_STATS. */ enum ovs_frag_handling ipv4_frags; /* OVS_DP_ATTR_IPV4_FRAGS. */ - const uint32_t *sampling; /* OVS_DP_ATTR_SAMPLING. */ }; static void dpif_linux_dp_init(struct dpif_linux_dp *); @@ -880,8 +879,7 @@ get_upcall_pid_flow(struct dpif_linux *dpif, const struct nlattr *nla; uint32_t port; - if (!(dpif->listen_mask & - ((1u << DPIF_UC_ACTION) | (1u << DPIF_UC_SAMPLE)))) { + if (!(dpif->listen_mask & (1u << DPIF_UC_ACTION))) { return 0; } @@ -976,35 +974,6 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask) return 0; } -static int -dpif_linux_get_sflow_probability(const struct dpif *dpif_, - uint32_t *probability) -{ - struct dpif_linux_dp dp; - struct ofpbuf *buf; - int error; - - error = dpif_linux_dp_get(dpif_, &dp, &buf); - if (!error) { - *probability = dp.sampling ? *dp.sampling : 0; - ofpbuf_delete(buf); - } - return error; -} - -static int -dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability) -{ - struct dpif_linux *dpif = dpif_linux_cast(dpif_); - struct dpif_linux_dp dp; - - dpif_linux_dp_init(&dp); - dp.cmd = OVS_DP_CMD_SET; - dp.dp_ifindex = dpif->dp_ifindex; - dp.sampling = &probability; - return dpif_linux_dp_transact(&dp, NULL, NULL); -} - static int dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, uint32_t queue_id, uint32_t *priority) @@ -1029,10 +998,6 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, /* OVS_PACKET_CMD_ACTION only. */ [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_U64, .optional = true }, - - /* OVS_PACKET_CMD_SAMPLE only. */ - [OVS_PACKET_ATTR_SAMPLE_POOL] = { .type = NL_A_U32, .optional = true }, - [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true }, }; struct ovs_header *ovs_header; @@ -1056,7 +1021,6 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION - : genl->cmd == OVS_PACKET_CMD_SAMPLE ? DPIF_UC_SAMPLE : -1); if (type < 0) { return EINVAL; @@ -1072,14 +1036,6 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, upcall->userdata = (a[OVS_PACKET_ATTR_USERDATA] ? nl_attr_get_u64(a[OVS_PACKET_ATTR_USERDATA]) : 0); - upcall->sample_pool = (a[OVS_PACKET_ATTR_SAMPLE_POOL] - ? nl_attr_get_u32(a[OVS_PACKET_ATTR_SAMPLE_POOL]) - : 0); - if (a[OVS_PACKET_ATTR_ACTIONS]) { - upcall->actions = (void *) nl_attr_get(a[OVS_PACKET_ATTR_ACTIONS]); - upcall->actions_len = nl_attr_get_size(a[OVS_PACKET_ATTR_ACTIONS]); - } - *dp_ifindex = ovs_header->dp_ifindex; return 0; @@ -1199,8 +1155,6 @@ const struct dpif_class dpif_linux_class = { dpif_linux_execute, dpif_linux_recv_get_mask, dpif_linux_recv_set_mask, - dpif_linux_get_sflow_probability, - dpif_linux_set_sflow_probability, dpif_linux_queue_to_priority, dpif_linux_recv, dpif_linux_recv_wait, @@ -1518,7 +1472,6 @@ dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) .max_len = sizeof(struct ovs_dp_stats), .optional = true }, [OVS_DP_ATTR_IPV4_FRAGS] = { .type = NL_A_U32, .optional = true }, - [OVS_DP_ATTR_SAMPLING] = { .type = NL_A_U32, .optional = true }, }; struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)]; @@ -1552,9 +1505,6 @@ dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) if (a[OVS_DP_ATTR_IPV4_FRAGS]) { dp->ipv4_frags = nl_attr_get_u32(a[OVS_DP_ATTR_IPV4_FRAGS]); } - if (a[OVS_DP_ATTR_SAMPLING]) { - dp->sampling = nl_attr_get(a[OVS_DP_ATTR_SAMPLING]); - } return 0; } @@ -1582,10 +1532,6 @@ dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf) if (dp->ipv4_frags) { nl_msg_put_u32(buf, OVS_DP_ATTR_IPV4_FRAGS, dp->ipv4_frags); } - - if (dp->sampling) { - nl_msg_put_u32(buf, OVS_DP_ATTR_SAMPLING, *dp->sampling); - } } /* Clears 'dp' to "empty" values. */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 359c80b7f..926464e26 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1379,8 +1379,6 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_execute, dpif_netdev_recv_get_mask, dpif_netdev_recv_set_mask, - NULL, /* get_sflow_probability */ - NULL, /* set_sflow_probability */ NULL, /* queue_to_priority */ dpif_netdev_recv, dpif_netdev_recv_wait, diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index c6c39da51..558b8913d 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -303,25 +303,6 @@ struct dpif_class { * called. */ int (*recv_set_mask)(struct dpif *dpif, int listen_mask); - /* Retrieves 'dpif''s sFlow sampling probability into '*probability'. - * Return value is 0 or a positive errno value. EOPNOTSUPP indicates that - * the datapath does not support sFlow, as does a null pointer. - * - * '*probability' is expressed as the number of packets out of UINT_MAX to - * sample, e.g. probability/UINT_MAX is the probability of sampling a given - * packet. */ - int (*get_sflow_probability)(const struct dpif *dpif, - uint32_t *probability); - - /* Sets 'dpif''s sFlow sampling probability to 'probability'. Return value - * is 0 or a positive errno value. EOPNOTSUPP indicates that the datapath - * does not support sFlow, as does a null pointer. - * - * 'probability' is expressed as the number of packets out of UINT_MAX to - * sample, e.g. probability/UINT_MAX is the probability of sampling a given - * packet. */ - int (*set_sflow_probability)(struct dpif *dpif, uint32_t probability); - /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a * priority value for use in the OVS_ACTION_ATTR_SET_PRIORITY action in * '*priority'. */ diff --git a/lib/dpif.c b/lib/dpif.c index ad143c8ed..2d21a9ff7 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -958,7 +958,6 @@ dpif_upcall_type_to_string(enum dpif_upcall_type type) switch (type) { case DPIF_UC_MISS: return "miss"; case DPIF_UC_ACTION: return "action"; - case DPIF_UC_SAMPLE: return "sample"; case DPIF_N_UC_TYPES: default: return ""; } } @@ -967,8 +966,7 @@ static bool OVS_UNUSED is_valid_listen_mask(int listen_mask) { return !(listen_mask & ~((1u << DPIF_UC_MISS) | - (1u << DPIF_UC_ACTION) | - (1u << DPIF_UC_SAMPLE))); + (1u << DPIF_UC_ACTION))); } /* Retrieves 'dpif''s "listen mask" into '*listen_mask'. A 1-bit of value 2**X @@ -1003,41 +1001,6 @@ dpif_recv_set_mask(struct dpif *dpif, int listen_mask) return error; } -/* Retrieve the sFlow sampling probability. '*probability' is expressed as the - * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is - * the probability of sampling a given packet. - * - * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP - * indicates that 'dpif' does not support sFlow sampling. */ -int -dpif_get_sflow_probability(const struct dpif *dpif, uint32_t *probability) -{ - int error = (dpif->dpif_class->get_sflow_probability - ? dpif->dpif_class->get_sflow_probability(dpif, probability) - : EOPNOTSUPP); - if (error) { - *probability = 0; - } - log_operation(dpif, "get_sflow_probability", error); - return error; -} - -/* Set the sFlow sampling probability. 'probability' is expressed as the - * number of packets out of UINT_MAX to sample, e.g. probability/UINT_MAX is - * the probability of sampling a given packet. - * - * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP - * indicates that 'dpif' does not support sFlow sampling. */ -int -dpif_set_sflow_probability(struct dpif *dpif, uint32_t probability) -{ - int error = (dpif->dpif_class->set_sflow_probability - ? dpif->dpif_class->set_sflow_probability(dpif, probability) - : EOPNOTSUPP); - log_operation(dpif, "set_sflow_probability", error); - return error; -} - /* Polls for an upcall from 'dpif'. If successful, stores the upcall into * '*upcall'. Only upcalls of the types selected with dpif_recv_set_mask() * member function will ordinarily be received (but if a message type is diff --git a/lib/dpif.h b/lib/dpif.h index c01010db9..b572d0fb2 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -155,7 +155,6 @@ int dpif_execute(struct dpif *, enum dpif_upcall_type { DPIF_UC_MISS, /* Miss in flow table. */ DPIF_UC_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ - DPIF_UC_SAMPLE, /* Packet sampling. */ DPIF_N_UC_TYPES }; @@ -177,17 +176,10 @@ struct dpif_upcall { /* DPIF_UC_ACTION only. */ uint64_t userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ - - /* DPIF_UC_SAMPLE only. */ - uint32_t sample_pool; /* # of sampling candidate packets so far. */ - struct nlattr *actions; /* Associated flow actions. */ - size_t actions_len; }; int dpif_recv_get_mask(const struct dpif *, int *listen_mask); int dpif_recv_set_mask(struct dpif *, int listen_mask); -int dpif_get_sflow_probability(const struct dpif *, uint32_t *probability); -int dpif_set_sflow_probability(struct dpif *, uint32_t probability); int dpif_recv(struct dpif *, struct dpif_upcall *); void dpif_recv_purge(struct dpif *); void dpif_recv_wait(struct dpif *); diff --git a/lib/odp-util.c b/lib/odp-util.c index 3d9439815..d97d30c03 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -62,6 +62,7 @@ odp_action_len(uint16_t type) case OVS_ACTION_ATTR_SET_PRIORITY: return 4; case OVS_ACTION_ATTR_POP_PRIORITY: return 0; + case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: return -1; @@ -89,13 +90,50 @@ format_generic_odp_action(struct ds *ds, const struct nlattr *a) } } +static void +format_odp_sample_action(struct ds *ds, const struct nlattr *attr) +{ + static const struct nl_policy ovs_sample_policy[] = { + [OVS_SAMPLE_ATTR_PROBABILITY] = { .type = NL_A_U32 }, + [OVS_SAMPLE_ATTR_ACTIONS] = { .type = NL_A_NESTED } + }; + struct nlattr *a[ARRAY_SIZE(ovs_sample_policy)]; + struct ofpbuf buf; + double percentage; + const struct nlattr *nla_acts; + int len; + + ds_put_cstr(ds, "sample"); + + ofpbuf_use_const(&buf, nl_attr_get(attr), nl_attr_get_size(attr)); + if (!nl_policy_parse(&buf, 0, ovs_sample_policy, a, + ARRAY_SIZE(ovs_sample_policy))) { + ds_put_cstr(ds, "(error)"); + return; + } + + percentage = (100.0 * nl_attr_get_u32(a[OVS_SAMPLE_ATTR_PROBABILITY])) / + UINT32_MAX; + + ds_put_format(ds, "(sample=%.1f%%,", percentage); + + ds_put_cstr(ds, "actions("); + nla_acts = nl_attr_get(a[OVS_SAMPLE_ATTR_ACTIONS]); + len = nl_attr_get_size(a[OVS_SAMPLE_ATTR_ACTIONS]); + format_odp_actions(ds, nla_acts, len); + ds_put_format(ds, "))"); +} + void format_odp_action(struct ds *ds, const struct nlattr *a) { const uint8_t *eth; ovs_be32 ip; + struct user_action_cookie cookie; + uint64_t data; - if (nl_attr_get_size(a) != odp_action_len(nl_attr_type(a))) { + if (nl_attr_get_size(a) != odp_action_len(nl_attr_type(a)) && + nl_attr_type(a) != OVS_ACTION_ATTR_SAMPLE) { ds_put_format(ds, "bad length %zu, expected %d for: ", nl_attr_get_size(a), odp_action_len(nl_attr_type(a))); format_generic_odp_action(ds, a); @@ -107,7 +145,21 @@ format_odp_action(struct ds *ds, const struct nlattr *a) ds_put_format(ds, "%"PRIu16, nl_attr_get_u32(a)); break; case OVS_ACTION_ATTR_USERSPACE: - ds_put_format(ds, "userspace(%"PRIu64")", nl_attr_get_u64(a)); + data = nl_attr_get_u64(a); + memcpy(&cookie, &data, sizeof(cookie)); + + if (cookie.type == USER_ACTION_COOKIE_CONTROLLER) { + ds_put_format(ds, "userspace(controller,length=%"PRIu32")", + cookie.data); + } else if (cookie.type == USER_ACTION_COOKIE_SFLOW) { + ds_put_format(ds, "userspace(sFlow,n_output=%"PRIu8"," + "vid=%"PRIu16",pcp=%"PRIu8",ifindex=%"PRIu32")", + cookie.n_output, vlan_tci_to_vid(cookie.vlan_tci), + vlan_tci_to_pcp(cookie.vlan_tci), cookie.data); + } else { + ds_put_format(ds, "userspace(unknown,data=0x%"PRIx64")", + nl_attr_get_u64(a)); + } break; case OVS_ACTION_ATTR_SET_TUNNEL: ds_put_format(ds, "set_tunnel(%#"PRIx64")", @@ -152,6 +204,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a) case OVS_ACTION_ATTR_POP_PRIORITY: ds_put_cstr(ds, "pop_priority"); break; + case OVS_ACTION_ATTR_SAMPLE: + format_odp_sample_action(ds, a); + break; default: format_generic_odp_action(ds, a); break; diff --git a/lib/odp-util.h b/lib/odp-util.h index 437972925..0e4bc0bd0 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -99,4 +99,22 @@ int odp_flow_key_from_string(const char *s, struct ofpbuf *); void odp_flow_key_from_flow(struct ofpbuf *, const struct flow *); int odp_flow_key_to_flow(const struct nlattr *, size_t, struct flow *); +enum user_action_cookie_type { + USER_ACTION_COOKIE_UNSPEC, + USER_ACTION_COOKIE_CONTROLLER, /* Packet for controller. */ + USER_ACTION_COOKIE_SFLOW, /* Packet for sFlow sampling. */ +}; + +/* user_action_cookie is passed as argument to OVS_ACTION_ATTR_USERSPACE. + * Since is it passed to kernel as u64, its size has to be 8 bytes. */ +struct user_action_cookie { + uint8_t type; /* enum user_action_cookie_type. */ + uint8_t n_output; /* No of output ports. used by sflow. */ + ovs_be16 vlan_tci; /* Used by sFlow */ + uint32_t data; /* Data is len for OFPP_CONTROLLER action. + For sFlow it is port_ifindex. */ +}; + +BUILD_ASSERT_DECL(sizeof(struct user_action_cookie) == 8); + #endif /* odp-util.h */ diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index 68eb804d6..d17b0be1c 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -46,7 +46,6 @@ struct dpif_sflow_port { }; struct dpif_sflow { - struct ofproto *ofproto; struct collectors *collectors; SFLAgent *sflow_agent; struct ofproto_sflow_options *options; @@ -54,6 +53,7 @@ struct dpif_sflow { time_t next_tick; size_t n_flood, n_all; struct hmap ports; /* Contains "struct dpif_sflow_port"s. */ + uint32_t probability; }; static void dpif_sflow_del_port__(struct dpif_sflow *, @@ -270,7 +270,7 @@ dpif_sflow_clear(struct dpif_sflow *ds) ds->options = NULL; /* Turn off sampling to save CPU cycles. */ - dpif_set_sflow_probability(ds->dpif, 0); + ds->probability = 0; } bool @@ -288,9 +288,19 @@ dpif_sflow_create(struct dpif *dpif) ds->dpif = dpif; ds->next_tick = time_now() + 1; hmap_init(&ds->ports); + ds->probability = 0; return ds; } +/* 32-bit fraction of packets to sample with. A value of 0 samples no packets, + * a value of %UINT32_MAX samples all packets and intermediate values sample + * intermediate fractions of packets. */ +uint32_t +dpif_sflow_get_probability(const struct dpif_sflow *ds) +{ + return ds->probability; +} + void dpif_sflow_destroy(struct dpif_sflow *ds) { @@ -457,8 +467,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds, sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff); /* Set the sampling_rate down in the datapath. */ - dpif_set_sflow_probability(ds->dpif, - MAX(1, UINT32_MAX / options->sampling_rate)); + ds->probability = MAX(1, UINT32_MAX / ds->options->sampling_rate); /* Add samplers and pollers for the currently known ports. */ HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) { @@ -467,7 +476,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds, } } -static int +int dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds, uint16_t odp_port) { @@ -476,24 +485,34 @@ dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds, } void -dpif_sflow_received(struct dpif_sflow *ds, const struct dpif_upcall *upcall, - const struct flow *flow) +dpif_sflow_received(struct dpif_sflow *ds, struct ofpbuf *packet, + const struct flow *flow, + const struct user_action_cookie *cookie) { SFL_FLOW_SAMPLE_TYPE fs; SFLFlow_sample_element hdrElem; SFLSampled_header *header; SFLFlow_sample_element switchElem; SFLSampler *sampler; - unsigned int left; - struct nlattr *a; - size_t n_outputs; + struct dpif_sflow_port *in_dsp; + struct netdev_stats stats; + int error; /* Build a flow sample */ memset(&fs, 0, sizeof fs); - fs.input = dpif_sflow_odp_port_to_ifindex(ds, - ofp_port_to_odp_port(flow->in_port)); - fs.output = 0; /* Filled in correctly below. */ - fs.sample_pool = upcall->sample_pool; + + in_dsp = dpif_sflow_find_port(ds, ofp_port_to_odp_port(flow->in_port)); + if (!in_dsp) { + return; + } + fs.input = SFL_DS_INDEX(in_dsp->dsi); + + error = netdev_get_stats(in_dsp->netdev, &stats); + if (error) { + VLOG_WARN_RL(&rl, "netdev get-stats error %s", strerror(error)); + return; + } + fs.sample_pool = stats.rx_packets; /* We are going to give it to the sampler that represents this input port. * By implementing "ingress-only" sampling like this we ensure that we @@ -512,54 +531,34 @@ dpif_sflow_received(struct dpif_sflow *ds, const struct dpif_upcall *upcall, header->header_protocol = SFLHEADER_ETHERNET_ISO8023; /* The frame_length should include the Ethernet FCS (4 bytes), but it has already been stripped, so we need to add 4 here. */ - header->frame_length = upcall->packet->size + 4; + header->frame_length = packet->size + 4; /* Ethernet FCS stripped off. */ header->stripped = 4; - header->header_length = MIN(upcall->packet->size, + header->header_length = MIN(packet->size, sampler->sFlowFsMaximumHeaderSize); - header->header_bytes = upcall->packet->data; + header->header_bytes = packet->data; /* Add extended switch element. */ memset(&switchElem, 0, sizeof(switchElem)); switchElem.tag = SFLFLOW_EX_SWITCH; switchElem.flowType.sw.src_vlan = vlan_tci_to_vid(flow->vlan_tci); switchElem.flowType.sw.src_priority = vlan_tci_to_pcp(flow->vlan_tci); - /* Initialize the output VLAN and priority to be the same as the input, - but these fields can be overriden below if affected by an action. */ - switchElem.flowType.sw.dst_vlan = switchElem.flowType.sw.src_vlan; - switchElem.flowType.sw.dst_priority = switchElem.flowType.sw.src_priority; - /* Figure out the output ports. */ - n_outputs = 0; - NL_ATTR_FOR_EACH_UNSAFE (a, left, upcall->actions, upcall->actions_len) { - ovs_be16 tci; - - switch (nl_attr_type(a)) { - case OVS_ACTION_ATTR_OUTPUT: - fs.output = dpif_sflow_odp_port_to_ifindex(ds, nl_attr_get_u32(a)); - n_outputs++; - break; - - case OVS_ACTION_ATTR_PUSH_VLAN: - tci = nl_attr_get_be16(a); - switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(tci); - switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(tci); - break; - - default: - break; - } - } + /* Retrieve data from user_action_cookie. */ + switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(cookie->vlan_tci); + switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(cookie->vlan_tci); /* Set output port, as defined by http://www.sflow.org/sflow_version_5.txt (search for "Input/output port information"). */ - if (!n_outputs) { + if (!cookie->n_output) { /* This value indicates that the packet was dropped for an unknown * reason. */ fs.output = 0x40000000 | 256; - } else if (n_outputs > 1 || !fs.output) { + } else if (cookie->n_output > 1 || !cookie->data) { /* Setting the high bit means "multiple output ports". */ - fs.output = 0x80000000 | n_outputs; + fs.output = 0x80000000 | cookie->n_output; + } else { + fs.output = cookie->data; } /* Submit the flow sample to be encoded into the next datagram. */ diff --git a/ofproto/ofproto-dpif-sflow.h b/ofproto/ofproto-dpif-sflow.h index c7c8b5af3..439f41045 100644 --- a/ofproto/ofproto-dpif-sflow.h +++ b/ofproto/ofproto-dpif-sflow.h @@ -20,6 +20,7 @@ #include #include "svec.h" +#include "lib/odp-util.h" struct dpif; struct dpif_upcall; @@ -27,6 +28,8 @@ struct flow; struct ofproto_sflow_options; struct dpif_sflow *dpif_sflow_create(struct dpif *); +uint32_t dpif_sflow_get_probability(const struct dpif_sflow *); + void dpif_sflow_destroy(struct dpif_sflow *); void dpif_sflow_set_options(struct dpif_sflow *, const struct ofproto_sflow_options *); @@ -40,7 +43,11 @@ void dpif_sflow_del_port(struct dpif_sflow *, uint16_t ovs_port); void dpif_sflow_run(struct dpif_sflow *); void dpif_sflow_wait(struct dpif_sflow *); -void dpif_sflow_received(struct dpif_sflow *, const struct dpif_upcall *, - const struct flow *); +void dpif_sflow_received(struct dpif_sflow *, + struct ofpbuf *, + const struct flow *, + const struct user_action_cookie *); + +int dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *, uint16_t); #endif /* ofproto/ofproto-dpif-sflow.h */ diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index ecad4890b..f9738e744 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -205,6 +205,9 @@ struct action_xlate_ctx { struct flow base_flow; /* Flow at the last commit. */ uint32_t base_priority; /* Priority at the last commit. */ uint8_t table_id; /* OpenFlow table ID where flow was found. */ + uint32_t sflow_n_outputs; /* Number of output ports. */ + uint16_t sflow_odp_port; /* Output port for composing sFlow action. */ + uint16_t user_cookie_offset;/* Used for user_action_cookie fixup. */ }; static void action_xlate_ctx_init(struct action_xlate_ctx *, @@ -401,7 +404,9 @@ static int expire(struct ofproto_dpif *); /* Utilities. */ static int send_packet(struct ofproto_dpif *, uint32_t odp_port, const struct ofpbuf *packet); - +static size_t +compose_sflow_action(const struct ofproto_dpif *, struct ofpbuf *odp_actions, + const struct flow *, uint32_t odp_port); /* Global variables. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -471,8 +476,7 @@ construct(struct ofproto *ofproto_, int *n_tablesp) error = dpif_recv_set_mask(ofproto->dpif, ((1u << DPIF_UC_MISS) | - (1u << DPIF_UC_ACTION) | - (1u << DPIF_UC_SAMPLE))); + (1u << DPIF_UC_ACTION))); if (error) { VLOG_ERR("failed to listen on datapath %s: %s", name, strerror(error)); dpif_close(ofproto->dpif); @@ -810,6 +814,7 @@ set_sflow(struct ofproto *ofproto_, { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct dpif_sflow *ds = ofproto->sflow; + if (sflow_options) { if (!ds) { struct ofport_dpif *ofport; @@ -819,11 +824,15 @@ set_sflow(struct ofproto *ofproto_, dpif_sflow_add_port(ds, ofport->odp_port, netdev_get_name(ofport->up.netdev)); } + ofproto->need_revalidate = true; } dpif_sflow_set_options(ds, sflow_options); } else { - dpif_sflow_destroy(ds); - ofproto->sflow = NULL; + if (ds) { + dpif_sflow_destroy(ds); + ofproto->need_revalidate = true; + ofproto->sflow = NULL; + } } return 0; } @@ -1713,12 +1722,15 @@ send_packet_in(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall, const struct flow *flow, bool clone) { struct ofputil_packet_in pin; + struct user_action_cookie cookie; pin.packet = upcall->packet; pin.in_port = flow->in_port; pin.reason = upcall->type == DPIF_UC_MISS ? OFPR_NO_MATCH : OFPR_ACTION; pin.buffer_id = 0; /* not yet known */ - pin.send_len = upcall->userdata; + + memcpy(&cookie, &upcall->userdata, sizeof(cookie)); + pin.send_len = cookie.data; connmgr_send_packet_in(ofproto->up.connmgr, &pin, flow, clone ? NULL : upcall->packet); } @@ -1822,23 +1834,36 @@ handle_miss_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) } static void -handle_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) +handle_userspace_upcall(struct ofproto_dpif *ofproto, + struct dpif_upcall *upcall) { struct flow flow; + struct user_action_cookie cookie; - switch (upcall->type) { - case DPIF_UC_ACTION: + memcpy(&cookie, &upcall->userdata, sizeof(cookie)); + + if (cookie.type == USER_ACTION_COOKIE_SFLOW) { + if (ofproto->sflow) { + odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); + dpif_sflow_received(ofproto->sflow, upcall->packet, &flow, &cookie); + } + ofpbuf_delete(upcall->packet); + + } else if (cookie.type == USER_ACTION_COOKIE_CONTROLLER) { COVERAGE_INC(ofproto_dpif_ctlr_action); odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); send_packet_in(ofproto, upcall, &flow, false); - break; + } else { + VLOG_WARN_RL(&rl, "invalid user cookie : 0x%"PRIx64, upcall->userdata); + } +} - case DPIF_UC_SAMPLE: - if (ofproto->sflow) { - odp_flow_key_to_flow(upcall->key, upcall->key_len, &flow); - dpif_sflow_received(ofproto->sflow, upcall, &flow); - } - ofpbuf_delete(upcall->packet); +static void +handle_upcall(struct ofproto_dpif *ofproto, struct dpif_upcall *upcall) +{ + switch (upcall->type) { + case DPIF_UC_ACTION: + handle_userspace_upcall(ofproto, upcall); break; case DPIF_UC_MISS: @@ -2174,39 +2199,43 @@ execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, const struct nlattr *odp_actions, size_t actions_len, struct ofpbuf *packet) { + struct odputil_keybuf keybuf; + struct ofpbuf key; + int error; + if (actions_len == NLA_ALIGN(NLA_HDRLEN + sizeof(uint64_t)) && odp_actions->nla_type == OVS_ACTION_ATTR_USERSPACE) { - /* As an optimization, avoid a round-trip from userspace to kernel to - * userspace. This also avoids possibly filling up kernel packet - * buffers along the way. */ + const struct user_action_cookie *cookie; struct dpif_upcall upcall; - upcall.type = DPIF_UC_ACTION; - upcall.packet = packet; - upcall.key = NULL; - upcall.key_len = 0; - upcall.userdata = nl_attr_get_u64(odp_actions); - upcall.sample_pool = 0; - upcall.actions = NULL; - upcall.actions_len = 0; + cookie = nl_attr_get_unspec(odp_actions, sizeof(*cookie)); + if (cookie->type == USER_ACTION_COOKIE_CONTROLLER) { + /* As an optimization, avoid a round-trip from userspace to kernel + * to userspace. This also avoids possibly filling up kernel packet + * buffers along the way. + * This optimization does not work in case of sFlow is turned ON. + * Since first action would be sFlow SAMPLE action followed by + * Controller action. */ - send_packet_in(ofproto, &upcall, flow, false); + upcall.type = DPIF_UC_ACTION; + upcall.packet = packet; + upcall.key = NULL; + upcall.key_len = 0; + upcall.userdata = nl_attr_get_u64(odp_actions); - return true; - } else { - struct odputil_keybuf keybuf; - struct ofpbuf key; - int error; - - ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); - odp_flow_key_from_flow(&key, flow); - - error = dpif_execute(ofproto->dpif, key.data, key.size, - odp_actions, actions_len, packet); - - ofpbuf_delete(packet); - return !error; + send_packet_in(ofproto, &upcall, flow, false); + return true; + } } + + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, flow); + + error = dpif_execute(ofproto->dpif, key.data, key.size, + odp_actions, actions_len, packet); + + ofpbuf_delete(packet); + return !error; } /* Executes the actions indicated by 'facet' on 'packet' and credits 'facet''s @@ -2910,6 +2939,8 @@ send_packet(struct ofproto_dpif *ofproto, uint32_t odp_port, odp_flow_key_from_flow(&key, &flow); ofpbuf_init(&odp_actions, 32); + compose_sflow_action(ofproto, &odp_actions, &flow, odp_port); + nl_msg_put_u32(&odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port); error = dpif_execute(ofproto->dpif, key.data, key.size, @@ -2930,6 +2961,95 @@ static void do_xlate_actions(const union ofp_action *in, size_t n_in, struct action_xlate_ctx *ctx); static void xlate_normal(struct action_xlate_ctx *); +/* Compose SAMPLE action for sFlow. */ +static size_t +compose_sflow_action(const struct ofproto_dpif *ofproto, + struct ofpbuf *odp_actions, + const struct flow *flow, + uint32_t odp_port) +{ + uint32_t port_ifindex; + uint32_t probability; + struct user_action_cookie *cookie; + size_t sample_offset, actions_offset; + int user_cookie_offset, n_output; + + if (!ofproto->sflow || flow->in_port == OFPP_NONE) { + return 0; + } + + if (odp_port == OVSP_NONE) { + port_ifindex = 0; + n_output = 0; + } else { + port_ifindex = dpif_sflow_odp_port_to_ifindex(ofproto->sflow, odp_port); + n_output = 1; + } + + sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE); + + /* Number of packets out of UINT_MAX to sample. */ + probability = dpif_sflow_get_probability(ofproto->sflow); + nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability); + + actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS); + + cookie = nl_msg_put_unspec_uninit(odp_actions, OVS_ACTION_ATTR_USERSPACE, + sizeof(*cookie)); + cookie->type = USER_ACTION_COOKIE_SFLOW; + cookie->data = port_ifindex; + cookie->n_output = n_output; + cookie->vlan_tci = 0; + user_cookie_offset = (char *) cookie - (char *) odp_actions->data; + + nl_msg_end_nested(odp_actions, actions_offset); + nl_msg_end_nested(odp_actions, sample_offset); + return user_cookie_offset; +} + +/* SAMPLE action must be first action in any given list of actions. + * At this point we do not have all information required to build it. So try to + * build sample action as complete as possible. */ +static void +add_sflow_action(struct action_xlate_ctx *ctx) +{ + ctx->user_cookie_offset = compose_sflow_action(ctx->ofproto, + ctx->odp_actions, + &ctx->flow, OVSP_NONE); + ctx->sflow_odp_port = 0; + ctx->sflow_n_outputs = 0; +} + +/* Fix SAMPLE action according to data collected while composing ODP actions. + * We need to fix SAMPLE actions OVS_SAMPLE_ATTR_ACTIONS attribute, i.e. nested + * USERSPACE action's user-cookie which is required for sflow. */ +static void +fix_sflow_action(struct action_xlate_ctx *ctx) +{ + const struct flow *base = &ctx->base_flow; + struct user_action_cookie *cookie; + + if (!ctx->user_cookie_offset) { + return; + } + + cookie = ofpbuf_at(ctx->odp_actions, ctx->user_cookie_offset, + sizeof(*cookie)); + assert(cookie != NULL); + assert(cookie->type == USER_ACTION_COOKIE_SFLOW); + + if (ctx->sflow_n_outputs) { + cookie->data = dpif_sflow_odp_port_to_ifindex(ctx->ofproto->sflow, + ctx->sflow_odp_port); + } + if (ctx->sflow_n_outputs >= 255) { + cookie->n_output = 255; + } else { + cookie->n_output = ctx->sflow_n_outputs; + } + cookie->vlan_tci = base->vlan_tci; +} + static void commit_vlan_tci(struct action_xlate_ctx *ctx, ovs_be16 vlan_tci) { @@ -3012,6 +3132,14 @@ commit_odp_actions(struct action_xlate_ctx *ctx) } } +static void +compose_output_action(struct action_xlate_ctx *ctx, uint16_t odp_port) +{ + nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port); + ctx->sflow_odp_port = odp_port; + ctx->sflow_n_outputs++; +} + static void add_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port) { @@ -3032,7 +3160,7 @@ add_output_action(struct action_xlate_ctx *ctx, uint16_t ofp_port) } commit_odp_actions(ctx); - nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, odp_port); + compose_output_action(ctx, odp_port); ctx->nf_output_iface = ofp_port; } @@ -3113,14 +3241,27 @@ flood_packets(struct action_xlate_ctx *ctx, ovs_be32 mask) HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) { uint16_t ofp_port = ofport->up.ofp_port; if (ofp_port != ctx->flow.in_port && !(ofport->up.opp.config & mask)) { - nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, - ofport->odp_port); + compose_output_action(ctx, ofport->odp_port); } } ctx->nf_output_iface = NF_OUT_FLOOD; } +static void +compose_controller_action(struct ofpbuf *odp_actions, int len) +{ + struct user_action_cookie cookie; + + cookie.type = USER_ACTION_COOKIE_CONTROLLER; + cookie.data = len; + cookie.n_output = 0; + cookie.vlan_tci = 0; + + nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_USERSPACE, + &cookie, sizeof(cookie)); +} + static void xlate_output_action__(struct action_xlate_ctx *ctx, uint16_t port, uint16_t max_len) @@ -3147,7 +3288,7 @@ xlate_output_action__(struct action_xlate_ctx *ctx, break; case OFPP_CONTROLLER: commit_odp_actions(ctx); - nl_msg_put_u64(ctx->odp_actions, OVS_ACTION_ATTR_USERSPACE, max_len); + compose_controller_action(ctx->odp_actions, max_len); break; case OFPP_LOCAL: add_output_action(ctx, OFPP_LOCAL); @@ -3515,7 +3656,9 @@ xlate_actions(struct action_xlate_ctx *ctx, if (process_special(ctx->ofproto, &ctx->flow, ctx->packet)) { ctx->may_set_up_flow = false; } else { + add_sflow_action(ctx); do_xlate_actions(in, n_in, ctx); + fix_sflow_action(ctx); } /* Check with in-band control to see if we're allowed to set up this @@ -3847,8 +3990,7 @@ compose_actions(struct action_xlate_ctx *ctx, uint16_t vlan, if (dst->vid != initial_vid) { continue; } - nl_msg_put_u32(ctx->odp_actions, - OVS_ACTION_ATTR_OUTPUT, dst->port->odp_port); + compose_output_action(ctx, dst->port->odp_port); } /* Then output the rest. */ @@ -3869,8 +4011,7 @@ compose_actions(struct action_xlate_ctx *ctx, uint16_t vlan, cur_vid = dst->vid; } - nl_msg_put_u32(ctx->odp_actions, - OVS_ACTION_ATTR_OUTPUT, dst->port->odp_port); + compose_output_action(ctx, dst->port->odp_port); } dst_set_free(&set);