2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 09:58:01 +00:00
ovs/ofproto/ofproto-dpif-sflow.c

1444 lines
48 KiB
C
Raw Permalink Normal View History

/*
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
* Copyright (c) 2009 InMon Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "ofproto-dpif-sflow.h"
#include <inttypes.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <net/if.h>
#include <stdlib.h>
#include "collectors.h"
#include "compiler.h"
#include "dpif.h"
#include "hash.h"
#include "openvswitch/hmap.h"
#include "netdev.h"
#include "netlink.h"
#include "openvswitch/ofpbuf.h"
#include "ofproto.h"
#include "packets.h"
#include "openvswitch/poll-loop.h"
#include "ovs-router.h"
#include "route-table.h"
#include "sflow_api.h"
#include "socket-util.h"
#include "timeval.h"
#include "openvswitch/vlog.h"
#include "lib/odp-util.h"
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
#include "lib/unaligned.h"
#include "ofproto-provider.h"
#include "lacp.h"
VLOG_DEFINE_THIS_MODULE(sflow);
static struct ovs_mutex mutex;
/* This global var is used to determine which sFlow
sub-agent should send the datapath counters. */
#define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1
static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
/*
* The enum dpif_sflow_tunnel_type is to declare the types supported
*/
enum dpif_sflow_tunnel_type {
DPIF_SFLOW_TUNNEL_UNKNOWN = 0,
DPIF_SFLOW_TUNNEL_VXLAN,
DPIF_SFLOW_TUNNEL_GRE,
DPIF_SFLOW_TUNNEL_GENEVE
};
struct dpif_sflow_port {
struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
struct ofport *ofport; /* To retrieve port stats. */
odp_port_t odp_port;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
enum dpif_sflow_tunnel_type tunnel_type;
};
struct dpif_sflow {
struct collectors *collectors;
SFLAgent *sflow_agent;
struct ofproto_sflow_options *options;
time_t next_tick;
size_t n_flood, n_all;
struct hmap ports; /* Contains "struct dpif_sflow_port"s. */
uint32_t probability;
struct ovs_refcount ref_cnt;
};
static void dpif_sflow_del_port__(struct dpif_sflow *,
struct dpif_sflow_port *);
#define RECEIVER_INDEX 1
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
static bool
ofproto_sflow_options_equal(const struct ofproto_sflow_options *a,
const struct ofproto_sflow_options *b)
{
return (sset_equals(&a->targets, &b->targets)
&& a->sampling_rate == b->sampling_rate
&& a->polling_interval == b->polling_interval
&& a->header_len == b->header_len
&& a->sub_id == b->sub_id
&& nullable_string_is_equal(a->agent_device, b->agent_device)
&& nullable_string_is_equal(a->control_ip, b->control_ip));
}
static struct ofproto_sflow_options *
ofproto_sflow_options_clone(const struct ofproto_sflow_options *old)
{
struct ofproto_sflow_options *new = xmemdup(old, sizeof *old);
sset_clone(&new->targets, &old->targets);
new->agent_device = nullable_xstrdup(old->agent_device);
new->control_ip = nullable_xstrdup(old->control_ip);
return new;
}
static void
ofproto_sflow_options_destroy(struct ofproto_sflow_options *options)
{
if (options) {
sset_destroy(&options->targets);
free(options->agent_device);
free(options->control_ip);
free(options);
}
}
/* sFlow library callback to allocate memory. */
static void *
sflow_agent_alloc_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
size_t bytes)
{
return xzalloc(bytes);
}
/* sFlow library callback to free memory. */
static int
sflow_agent_free_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
void *obj)
{
free(obj);
return 0;
}
/* sFlow library callback to report error. */
static void
sflow_agent_error_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
char *msg)
{
VLOG_WARN("sFlow agent error: %s", msg);
}
/* sFlow library callback to send datagram. */
static void
sflow_agent_send_packet_cb(void *ds_, SFLAgent *agent OVS_UNUSED,
SFLReceiver *receiver OVS_UNUSED, u_char *pkt,
uint32_t pktLen)
{
struct dpif_sflow *ds = ds_;
collectors_send(ds->collectors, pkt, pktLen);
}
static struct dpif_sflow_port *
dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
OVS_REQUIRES(mutex)
{
struct dpif_sflow_port *dsp;
HMAP_FOR_EACH_IN_BUCKET (dsp, hmap_node, hash_odp_port(odp_port),
&ds->ports) {
if (dsp->odp_port == odp_port) {
return dsp;
}
}
return NULL;
}
/* Call to get the datapath stats. Modeled after the dpctl utility.
*
* It might be more efficient for this module to be given a handle it can use
* to get these stats more efficiently, but this is only going to be called
* once every 20-30 seconds. Return number of datapaths found (normally expect
* 1). */
static int
sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
struct dpif_dp_stats *dp_totals)
{
struct sset types;
const char *type;
int count = 0;
memset(dp_totals, 0, sizeof *dp_totals);
sset_init(&types);
dp_enumerate_types(&types);
SSET_FOR_EACH (type, &types) {
struct sset names;
const char *name;
sset_init(&names);
if (dp_enumerate_names(type, &names) == 0) {
SSET_FOR_EACH (name, &names) {
struct dpif *dpif;
if (dpif_open(name, type, &dpif) == 0) {
struct dpif_dp_stats dp_stats;
if (dpif_get_dp_stats(dpif, &dp_stats) == 0) {
count++;
dp_totals->n_hit += dp_stats.n_hit;
dp_totals->n_missed += dp_stats.n_missed;
dp_totals->n_lost += dp_stats.n_lost;
dp_totals->n_flows += dp_stats.n_flows;
dp_totals->n_mask_hit += dp_stats.n_mask_hit;
dp_totals->n_masks += dp_stats.n_masks;
}
dpif_close(dpif);
}
}
sset_destroy(&names);
}
}
sset_destroy(&types);
return count;
}
/* If there are multiple bridges defined then we need some
minimal artibration to decide which one should send the
global counters. This function allows each sub-agent to
ask if he should do it or not. */
static bool
sflow_global_counters_subid_test(uint32_t subid)
OVS_REQUIRES(mutex)
{
if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) {
/* The role is up for grabs. */
sflow_global_counters_subid = subid;
}
return (sflow_global_counters_subid == subid);
}
static void
sflow_global_counters_subid_clear(uint32_t subid)
OVS_REQUIRES(mutex)
{
if (sflow_global_counters_subid == subid) {
/* The sub-agent that was sending global counters
is going away, so reset to allow another
to take over. */
sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
}
}
static void
sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
SFL_COUNTERS_SAMPLE_TYPE *cs)
OVS_REQUIRES(mutex)
{
struct dpif_sflow *ds = ds_;
SFLCounters_sample_element dp_elem, res_elem;
struct dpif_dp_stats dp_totals;
struct rusage usage;
if (!sflow_global_counters_subid_test(poller->agent->subId)) {
/* Another sub-agent is currently responsible for this. */
return;
}
/* datapath stats */
if (sflow_get_dp_stats(ds, &dp_totals)) {
dp_elem.tag = SFLCOUNTERS_OVSDP;
dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit;
dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed;
dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost;
dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit;
dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows;
dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks;
SFLADD_ELEMENT(cs, &dp_elem);
}
/* resource usage */
getrusage(RUSAGE_SELF, &usage);
res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
res_elem.counterBlock.appResources.user_time
= timeval_to_msec(&usage.ru_utime);
res_elem.counterBlock.appResources.system_time
= timeval_to_msec(&usage.ru_stime);
res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);
SFLADD_ELEMENT(cs, &res_elem);
sfl_poller_writeCountersSample(poller, cs);
}
static void
sflow_agent_get_counters(void *ds_, SFLPoller *poller,
SFL_COUNTERS_SAMPLE_TYPE *cs)
OVS_REQUIRES(mutex)
{
struct dpif_sflow *ds = ds_;
SFLCounters_sample_element elem, lacp_elem, of_elem, name_elem;
SFLCounters_sample_element eth_elem;
enum netdev_features current;
struct dpif_sflow_port *dsp;
SFLIf_counters *counters;
SFLEthernet_counters* eth_counters;
struct netdev_stats stats;
enum netdev_flags flags;
struct lacp_member_stats lacp_stats;
netdev: Add netdev_get_speed() to netdev API. Currently, the netdev's speed is being calculated by taking the link's feature bits (using netdev_get_features()) and transforming them into bps. This mechanism can be both inaccurate and difficult to maintain, mainly because we currently use the feature bits supported by OpenFlow which would have to be extended to support all new feature bits of all netdev implementations while keeping the OpenFlow API intact. In order to expose the link speed accurately for all current and future hardware, add a new netdev API call that allows the implementations to provide the current and maximum link speeds in Mbps. Internally, the logic to get the maximum supported speed still relies on feature bits so it might still get out of sync in the future. However, the maximum configurable speed is not used as much as the current speed and these feature bits are not exposed through the netdev interface so it should be easier to add more. Use this new function instead of netdev_get_features() where the link speed is needed. As a consequence of this patch, link speeds of cards is properly reported (internally in OVSDB) even if not supported by OpenFlow. A test verifies this behavior using a tap device. Also, in order to avoid using the old, this patch adds a checkpatch.py warning if the old API is used. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2137567 Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Adrian Moreno <amorenoz@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-07-17 10:08:11 +02:00
uint32_t curr_speed;
const char *ifName;
dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort));
if (!dsp) {
return;
}
elem.tag = SFLCOUNTERS_GENERIC;
counters = &elem.counterBlock.generic;
counters->ifIndex = SFL_DS_INDEX(poller->dsi);
counters->ifType = 6;
if (!netdev_get_features(dsp->ofport->netdev, &current, NULL, NULL, NULL)) {
/* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown,
1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */
counters->ifDirection = (netdev_features_is_full_duplex(current)
? 1 : 2);
} else {
counters->ifDirection = 0;
}
netdev: Add netdev_get_speed() to netdev API. Currently, the netdev's speed is being calculated by taking the link's feature bits (using netdev_get_features()) and transforming them into bps. This mechanism can be both inaccurate and difficult to maintain, mainly because we currently use the feature bits supported by OpenFlow which would have to be extended to support all new feature bits of all netdev implementations while keeping the OpenFlow API intact. In order to expose the link speed accurately for all current and future hardware, add a new netdev API call that allows the implementations to provide the current and maximum link speeds in Mbps. Internally, the logic to get the maximum supported speed still relies on feature bits so it might still get out of sync in the future. However, the maximum configurable speed is not used as much as the current speed and these feature bits are not exposed through the netdev interface so it should be easier to add more. Use this new function instead of netdev_get_features() where the link speed is needed. As a consequence of this patch, link speeds of cards is properly reported (internally in OVSDB) even if not supported by OpenFlow. A test verifies this behavior using a tap device. Also, in order to avoid using the old, this patch adds a checkpatch.py warning if the old API is used. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2137567 Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Adrian Moreno <amorenoz@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2023-07-17 10:08:11 +02:00
netdev_get_speed(dsp->ofport->netdev, &curr_speed, NULL);
if (curr_speed) {
counters->ifSpeed = curr_speed * 1000000;
} else {
counters->ifSpeed = 100000000;
}
if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP) {
counters->ifStatus = 1; /* ifAdminStatus up. */
if (netdev_get_carrier(dsp->ofport->netdev)) {
counters->ifStatus |= 2; /* ifOperStatus us. */
}
} else {
counters->ifStatus = 0; /* Down. */
}
/* XXX
1. Is the multicast counter filled in?
2. Does the multicast counter include broadcasts?
3. Does the rx_packets counter include multicasts/broadcasts?
*/
ofproto_port_get_stats(dsp->ofport, &stats);
counters->ifInOctets = stats.rx_bytes;
counters->ifInUcastPkts = stats.rx_packets;
counters->ifInMulticastPkts = stats.multicast;
counters->ifInBroadcastPkts = stats.rx_broadcast_packets;
counters->ifInDiscards = stats.rx_dropped;
counters->ifInErrors = stats.rx_errors;
counters->ifInUnknownProtos = -1;
counters->ifOutOctets = stats.tx_bytes;
counters->ifOutUcastPkts = stats.tx_packets;
counters->ifOutMulticastPkts = stats.tx_multicast_packets;
counters->ifOutBroadcastPkts = stats.tx_broadcast_packets;
counters->ifOutDiscards = stats.tx_dropped;
counters->ifOutErrors = stats.tx_errors;
counters->ifPromiscuousMode = 0;
SFLADD_ELEMENT(cs, &elem);
/* Include LACP counters and identifiers if this port is part of a LAG. */
if (ofproto_port_get_lacp_stats(dsp->ofport, &lacp_stats) == 0) {
memset(&lacp_elem, 0, sizeof lacp_elem);
lacp_elem.tag = SFLCOUNTERS_LACP;
lacp_elem.counterBlock.lacp.actorSystemID =
lacp_stats.dot3adAggPortActorSystemID;
lacp_elem.counterBlock.lacp.partnerSystemID =
lacp_stats.dot3adAggPortPartnerOperSystemID;
lacp_elem.counterBlock.lacp.attachedAggID =
lacp_stats.dot3adAggPortAttachedAggID;
lacp_elem.counterBlock.lacp.portState.v.actorAdmin =
lacp_stats.dot3adAggPortActorAdminState;
lacp_elem.counterBlock.lacp.portState.v.actorOper =
lacp_stats.dot3adAggPortActorOperState;
lacp_elem.counterBlock.lacp.portState.v.partnerAdmin =
lacp_stats.dot3adAggPortPartnerAdminState;
lacp_elem.counterBlock.lacp.portState.v.partnerOper =
lacp_stats.dot3adAggPortPartnerOperState;
lacp_elem.counterBlock.lacp.LACPDUsRx =
lacp_stats.dot3adAggPortStatsLACPDUsRx;
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsRx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsRx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.unknownRx);
lacp_elem.counterBlock.lacp.illegalRx =
lacp_stats.dot3adAggPortStatsIllegalRx;
lacp_elem.counterBlock.lacp.LACPDUsTx =
lacp_stats.dot3adAggPortStatsLACPDUsTx;
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsTx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsTx);
SFLADD_ELEMENT(cs, &lacp_elem);
}
/* Include Port name. */
if ((ifName = netdev_get_name(dsp->ofport->netdev)) != NULL) {
memset(&name_elem, 0, sizeof name_elem);
name_elem.tag = SFLCOUNTERS_PORTNAME;
name_elem.counterBlock.portName.portName.str = (char *)ifName;
name_elem.counterBlock.portName.portName.len = strlen(ifName);
SFLADD_ELEMENT(cs, &name_elem);
}
/* Include OpenFlow DPID and openflow port number. */
memset(&of_elem, 0, sizeof of_elem);
of_elem.tag = SFLCOUNTERS_OPENFLOWPORT;
of_elem.counterBlock.ofPort.datapath_id =
ofproto_get_datapath_id(dsp->ofport->ofproto);
of_elem.counterBlock.ofPort.port_no =
(OVS_FORCE uint32_t)dsp->ofport->ofp_port;
SFLADD_ELEMENT(cs, &of_elem);
/* Include ethernet counters */
memset(&eth_elem, 0, sizeof eth_elem);
eth_elem.tag = SFLCOUNTERS_ETHERNET;
eth_counters = &eth_elem.counterBlock.ethernet;
eth_counters->dot3StatsAlignmentErrors = stats.rx_frame_errors;
eth_counters->dot3StatsFCSErrors = stats.rx_crc_errors;
eth_counters->dot3StatsFrameTooLongs = stats.rx_oversize_errors;
SFL_UNDEF_COUNTER(eth_counters->dot3StatsSingleCollisionFrames);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsMultipleCollisionFrames);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsSQETestErrors);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsDeferredTransmissions);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsLateCollisions);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsExcessiveCollisions);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsInternalMacTransmitErrors);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsCarrierSenseErrors);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsInternalMacReceiveErrors);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsSymbolErrors);
SFLADD_ELEMENT(cs, &eth_elem);
sfl_poller_writeCountersSample(poller, cs);
}
/* Obtains an address to use for the local sFlow agent and stores it into
* '*agent_addr'. Returns true if successful, false on failure.
*
* The sFlow agent address should be a local IP address that is persistent and
* reachable over the network, if possible. The IP address associated with
* 'agent_device' is used if it has one, and otherwise 'control_ip', the IP
* address used to talk to the controller. If the agent device is not
* specified then it is figured out by taking a look at the routing table based
* on 'targets'. */
static bool
sflow_choose_agent_address(const char *agent_device,
const struct sset *targets,
const char *control_ip,
SFLAddress *agent_addr)
{
struct in6_addr ip;
if (agent_device) {
/* If 'agent_device' is the name of a network device, use its IP
* address. */
if (!netdev_get_ip_by_name(agent_device, &ip)) {
goto success;
}
/* If 'agent_device' is itself an IP address, use it. */
struct sockaddr_storage ss;
if (inet_parse_address(agent_device, &ss)) {
ip = ss_get_address(&ss);
goto success;
}
}
/* Otherwise, use an appropriate local IP address for one of the
* collectors' remote IP addresses. */
const char *target;
SSET_FOR_EACH (target, targets) {
struct sockaddr_storage ss;
if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT,
&ss, true, NULL)) {
/* sFlow only supports target in default routing table with
* packet mark zero.
*/
struct in6_addr target_ip = ss_get_address(&ss);
struct in6_addr gw, src = in6addr_any;
char name[IFNAMSIZ];
if (ovs_router_lookup(0, &target_ip, name, &src, &gw)) {
ip = src;
goto success;
}
}
}
struct sockaddr_storage ss;
if (control_ip && inet_parse_address(control_ip, &ss)) {
ip = ss_get_address(&ss);
goto success;
}
VLOG_ERR("could not determine IP address for sFlow agent");
return false;
success:
memset(agent_addr, 0, sizeof *agent_addr);
if (IN6_IS_ADDR_V4MAPPED(&ip)) {
agent_addr->type = SFLADDRESSTYPE_IP_V4;
agent_addr->address.ip_v4.addr
= (OVS_FORCE uint32_t) in6_addr_get_mapped_ipv4(&ip);
} else {
agent_addr->type = SFLADDRESSTYPE_IP_V6;
memcpy(agent_addr->address.ip_v6.addr, ip.s6_addr,
sizeof agent_addr->address.ip_v6.addr);
}
return true;
}
static void
dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
{
if (ds->sflow_agent) {
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
sfl_agent_release(ds->sflow_agent);
free(ds->sflow_agent);
ds->sflow_agent = NULL;
}
collectors_destroy(ds->collectors);
ds->collectors = NULL;
ofproto_sflow_options_destroy(ds->options);
ds->options = NULL;
/* Turn off sampling to save CPU cycles. */
ds->probability = 0;
}
void
dpif_sflow_clear(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
dpif_sflow_clear__(ds);
ovs_mutex_unlock(&mutex);
}
bool
dpif_sflow_is_enabled(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
bool enabled;
ovs_mutex_lock(&mutex);
enabled = ds->collectors != NULL;
ovs_mutex_unlock(&mutex);
return enabled;
}
struct dpif_sflow *
dpif_sflow_create(void)
{
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
struct dpif_sflow *ds;
if (ovsthread_once_start(&once)) {
ovs_mutex_init_recursive(&mutex);
ovsthread_once_done(&once);
}
ds = xcalloc(1, sizeof *ds);
ds->next_tick = time_now() + 1;
hmap_init(&ds->ports);
ds->probability = 0;
ovs_refcount_init(&ds->ref_cnt);
return ds;
}
struct dpif_sflow *
dpif_sflow_ref(const struct dpif_sflow *ds_)
{
struct dpif_sflow *ds = CONST_CAST(struct dpif_sflow *, ds_);
if (ds) {
ovs_refcount_ref(&ds->ref_cnt);
}
return ds;
}
/* 32-bit fraction of packets to sample with. A value of 0 samples no packets,
* a value of %UINT32_MAX samples all packets and intermediate values sample
* intermediate fractions of packets. */
uint32_t
dpif_sflow_get_probability(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
uint32_t probability;
ovs_mutex_lock(&mutex);
probability = ds->probability;
ovs_mutex_unlock(&mutex);
return probability;
}
void
dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) {
struct dpif_sflow_port *dsp;
dpif_sflow_clear(ds);
HMAP_FOR_EACH_SAFE (dsp, hmap_node, &ds->ports) {
dpif_sflow_del_port__(ds, dsp);
}
hmap_destroy(&ds->ports);
free(ds);
}
}
static void
dpif_sflow_add_poller(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
OVS_REQUIRES(mutex)
{
SFLPoller *poller = sfl_agent_addPoller(ds->sflow_agent, &dsp->dsi, ds,
sflow_agent_get_counters);
sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
sfl_poller_set_bridgePort(poller, odp_to_u32(dsp->odp_port));
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
static enum dpif_sflow_tunnel_type
dpif_sflow_tunnel_type(struct ofport *ofport) {
const char *type = netdev_get_type(ofport->netdev);
if (type) {
if (strcmp(type, "gre") == 0) {
return DPIF_SFLOW_TUNNEL_GRE;
} else if (strcmp(type, "vxlan") == 0) {
return DPIF_SFLOW_TUNNEL_VXLAN;
} else if (strcmp(type, "geneve") == 0) {
return DPIF_SFLOW_TUNNEL_GENEVE;
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
return DPIF_SFLOW_TUNNEL_UNKNOWN;
}
static uint8_t
dpif_sflow_tunnel_proto(enum dpif_sflow_tunnel_type tunnel_type)
{
/* Default to 0 (IPPROTO_IP), meaning "unknown". */
uint8_t ipproto = 0;
switch(tunnel_type) {
case DPIF_SFLOW_TUNNEL_GRE:
ipproto = IPPROTO_GRE;
break;
case DPIF_SFLOW_TUNNEL_VXLAN:
case DPIF_SFLOW_TUNNEL_GENEVE:
ipproto = IPPROTO_UDP;
case DPIF_SFLOW_TUNNEL_UNKNOWN:
break;
}
return ipproto;
}
void
dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport,
odp_port_t odp_port) OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
int ifindex;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
enum dpif_sflow_tunnel_type tunnel_type;
ovs_mutex_lock(&mutex);
dpif_sflow_del_port(ds, odp_port);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
tunnel_type = dpif_sflow_tunnel_type(ofport);
ifindex = netdev_get_ifindex(ofport->netdev);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
if (ifindex <= 0
&& tunnel_type == DPIF_SFLOW_TUNNEL_UNKNOWN) {
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
/* Not an ifindex port, and not a tunnel port either
* so do not add a cross-reference to it here.
*/
goto out;
}
/* Add to table of ports. */
dsp = xmalloc(sizeof *dsp);
dsp->ofport = ofport;
dsp->odp_port = odp_port;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
dsp->tunnel_type = tunnel_type;
hmap_insert(&ds->ports, &dsp->hmap_node, hash_odp_port(odp_port));
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
if (ifindex > 0) {
/* Add poller for ports that have ifindex. */
SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0);
if (ds->sflow_agent) {
dpif_sflow_add_poller(ds, dsp);
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
} else {
/* Record "ifindex unknown" for the others */
SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, 0, 0);
}
out:
ovs_mutex_unlock(&mutex);
}
static void
dpif_sflow_del_port__(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
OVS_REQUIRES(mutex)
{
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
if (ds->sflow_agent
&& SFL_DS_INDEX(dsp->dsi)) {
sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi);
sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi);
}
hmap_remove(&ds->ports, &dsp->hmap_node);
free(dsp);
}
void
dpif_sflow_del_port(struct dpif_sflow *ds, odp_port_t odp_port)
OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
ovs_mutex_lock(&mutex);
dsp = dpif_sflow_find_port(ds, odp_port);
if (dsp) {
dpif_sflow_del_port__(ds, dsp);
}
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_set_options(struct dpif_sflow *ds,
const struct ofproto_sflow_options *options)
OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
bool options_changed;
SFLReceiver *receiver;
SFLAddress agentIP;
time_t now;
SFLDataSource_instance dsi;
uint32_t dsIndex;
SFLSampler *sampler;
SFLPoller *poller;
ovs_mutex_lock(&mutex);
if (sset_is_empty(&options->targets) || !options->sampling_rate) {
/* No point in doing any work if there are no targets or nothing to
* sample. */
dpif_sflow_clear__(ds);
goto out;
}
options_changed = (!ds->options
|| !ofproto_sflow_options_equal(options, ds->options));
/* Configure collectors if options have changed or if we're shortchanged in
* collectors (which indicates that opening one or more of the configured
* collectors failed, so that we should retry). */
if (options_changed
|| collectors_count(ds->collectors) < sset_count(&options->targets)) {
collectors_destroy(ds->collectors);
collectors_create(&options->targets, SFL_DEFAULT_COLLECTOR_PORT,
&ds->collectors);
if (ds->collectors == NULL) {
VLOG_WARN_RL(&rl, "no collectors could be initialized, "
"sFlow disabled");
dpif_sflow_clear__(ds);
goto out;
}
}
/* Choose agent IP address and agent device (if not yet setup) */
if (!sflow_choose_agent_address(options->agent_device,
&options->targets,
options->control_ip, &agentIP)) {
dpif_sflow_clear__(ds);
goto out;
}
/* Avoid reconfiguring if options didn't change. */
if (!options_changed) {
goto out;
}
ofproto_sflow_options_destroy(ds->options);
ds->options = ofproto_sflow_options_clone(options);
/* Create agent. */
VLOG_INFO("creating sFlow agent %d", options->sub_id);
if (ds->sflow_agent) {
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
sfl_agent_release(ds->sflow_agent);
}
ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
now = time_wall();
sfl_agent_init(ds->sflow_agent,
&agentIP,
options->sub_id,
now, /* Boot time. */
now, /* Current time. */
ds, /* Pointer supplied to callbacks. */
sflow_agent_alloc_cb,
sflow_agent_free_cb,
sflow_agent_error_cb,
sflow_agent_send_packet_cb);
receiver = sfl_agent_addReceiver(ds->sflow_agent);
sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow");
sfl_receiver_set_sFlowRcvrTimeout(receiver, UINT32_MAX);
/* Set the sampling_rate down in the datapath. */
ds->probability = MAX(1, UINT32_MAX / ds->options->sampling_rate);
/* Add a single sampler for the bridge. This appears as a PHYSICAL_ENTITY
because it is associated with the hypervisor, and interacts with the server
hardware directly. The sub_id is used to distinguish this sampler from
others on other bridges within the same agent. */
dsIndex = 1000 + options->sub_id;
SFL_DS_SET(dsi, SFL_DSCLASS_PHYSICAL_ENTITY, dsIndex, 0);
sampler = sfl_agent_addSampler(ds->sflow_agent, &dsi);
sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, ds->options->sampling_rate);
sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
/* Add a counter poller for the bridge so we can use it to send
global counters such as datapath cache hit/miss stats. */
poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds,
sflow_agent_get_global_counters);
sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
/* Add pollers for the currently known ifindex-ports */
HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
if (SFL_DS_INDEX(dsp->dsi)) {
dpif_sflow_add_poller(ds, dsp);
}
}
out:
ovs_mutex_unlock(&mutex);
}
int
dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds,
odp_port_t odp_port) OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
int ret;
ovs_mutex_lock(&mutex);
dsp = dpif_sflow_find_port(ds, odp_port);
ret = dsp ? SFL_DS_INDEX(dsp->dsi) : 0;
ovs_mutex_unlock(&mutex);
return ret;
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
static void
dpif_sflow_tunnel_v4(uint8_t tunnel_ipproto,
const struct flow_tnl *tunnel,
SFLSampled_ipv4 *ipv4)
{
ipv4->protocol = tunnel_ipproto;
ipv4->tos = tunnel->ip_tos;
ipv4->src_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_src;
ipv4->dst_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_dst;
ipv4->src_port = (OVS_FORCE uint16_t) tunnel->tp_src;
ipv4->dst_port = (OVS_FORCE uint16_t) tunnel->tp_dst;
}
static void
dpif_sflow_push_mpls_lse(struct dpif_sflow_actions *sflow_actions,
ovs_be32 lse)
{
if (sflow_actions->mpls_stack_depth >= FLOW_MAX_MPLS_LABELS) {
sflow_actions->mpls_err = true;
return;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
/* Record the new lse in host-byte-order. */
/* BOS flag will be fixed later when we send stack to sFlow library. */
sflow_actions->mpls_lse[sflow_actions->mpls_stack_depth++] = ntohl(lse);
}
static void
dpif_sflow_pop_mpls_lse(struct dpif_sflow_actions *sflow_actions)
{
if (sflow_actions->mpls_stack_depth == 0) {
sflow_actions->mpls_err = true;
return;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
sflow_actions->mpls_stack_depth--;
}
static void
dpif_sflow_set_mpls(struct dpif_sflow_actions *sflow_actions,
const struct ovs_key_mpls *mpls_key, int n)
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
{
int ii;
if (n > FLOW_MAX_MPLS_LABELS) {
sflow_actions->mpls_err = true;
return;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
for (ii = 0; ii < n; ii++) {
/* Reverse stack order, and use host-byte-order for each lse. */
sflow_actions->mpls_lse[n - ii - 1] = ntohl(mpls_key[ii].mpls_lse);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
sflow_actions->mpls_stack_depth = n;
}
static void
sflow_read_tnl_push_action(const struct nlattr *attr,
struct dpif_sflow_actions *sflow_actions)
{
/* Modeled on lib/odp-util.c: format_odp_tnl_push_header */
const struct ovs_action_push_tnl *data = nl_attr_get(attr);
const struct eth_header *eth = (const struct eth_header *) data->header;
const struct ip_header *ip
= ALIGNED_CAST(const struct ip_header *, eth + 1);
sflow_actions->out_port = data->out_port;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
/* Ethernet. */
/* TODO: SFlow does not currently define a MAC-in-MAC
* encapsulation structure. We could use an extension
* structure to report this.
*/
/* IPv4 */
/* Cannot assume alignment so just use memcpy. */
sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
sflow_actions->tunnel.ip_tos = ip->ip_tos;
sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
/* The tnl_push action can supply the ip_protocol too. */
sflow_actions->tunnel_ipproto = ip->ip_proto;
/* Layer 4 */
if (data->tnl_type == OVS_VPORT_TYPE_VXLAN
|| data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
const struct udp_header *udp = (const struct udp_header *) (ip + 1);
sflow_actions->tunnel.tp_src = udp->udp_src;
sflow_actions->tunnel.tp_dst = udp->udp_dst;
if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
const struct vxlanhdr *vxh = (const struct vxlanhdr *) (udp + 1);
uint64_t tun_id = ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8;
sflow_actions->tunnel.tun_id = htonll(tun_id);
} else {
const struct genevehdr *gnh = (const struct genevehdr *) (udp + 1);
uint64_t tun_id = ntohl(get_16aligned_be32(&gnh->vni)) >> 8;
sflow_actions->tunnel.tun_id = htonll(tun_id);
}
} else if (data->tnl_type == OVS_VPORT_TYPE_GRE) {
const void *l4 = ip + 1;
const struct gre_base_hdr *greh = (const struct gre_base_hdr *) l4;
ovs_16aligned_be32 *options = (ovs_16aligned_be32 *)(greh + 1);
if (greh->flags & htons(GRE_CSUM)) {
options++;
}
if (greh->flags & htons(GRE_KEY)) {
uint64_t tun_id = ntohl(get_16aligned_be32(options));
sflow_actions->tunnel.tun_id = htonll(tun_id);
}
}
}
static void
sflow_read_set_action(const struct nlattr *attr,
struct dpif_sflow_actions *sflow_actions)
{
enum ovs_key_attr type = nl_attr_type(attr);
switch (type) {
case OVS_KEY_ATTR_ENCAP:
if (++sflow_actions->encap_depth > 1) {
/* Do not handle multi-encap for now. */
sflow_actions->tunnel_err = true;
} else {
dpif_sflow_read_actions(NULL,
nl_attr_get(attr), nl_attr_get_size(attr),
sflow_actions, true);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
break;
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_DP_HASH:
case OVS_KEY_ATTR_RECIRC_ID:
break;
case OVS_KEY_ATTR_TUNNEL: {
if (++sflow_actions->encap_depth > 1) {
/* Do not handle multi-encap for now. */
sflow_actions->tunnel_err = true;
} else {
if (odp_tun_key_from_attr(attr, &sflow_actions->tunnel, NULL)
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
== ODP_FIT_ERROR) {
/* Tunnel parsing error. */
sflow_actions->tunnel_err = true;
}
}
break;
}
case OVS_KEY_ATTR_IN_PORT:
case OVS_KEY_ATTR_ETHERNET:
case OVS_KEY_ATTR_VLAN:
break;
case OVS_KEY_ATTR_MPLS: {
const struct ovs_key_mpls *mpls_key = nl_attr_get(attr);
size_t size = nl_attr_get_size(attr);
dpif_sflow_set_mpls(sflow_actions, mpls_key, size / sizeof *mpls_key);
break;
}
case OVS_KEY_ATTR_ETHERTYPE:
case OVS_KEY_ATTR_IPV4:
if (sflow_actions->encap_depth == 1) {
const struct ovs_key_ipv4 *key = nl_attr_get(attr);
if (key->ipv4_src) {
sflow_actions->tunnel.ip_src = key->ipv4_src;
}
if (key->ipv4_dst) {
sflow_actions->tunnel.ip_dst = key->ipv4_dst;
}
if (key->ipv4_proto) {
sflow_actions->tunnel_ipproto = key->ipv4_proto;
}
if (key->ipv4_tos) {
sflow_actions->tunnel.ip_tos = key->ipv4_tos;
}
if (key->ipv4_ttl) {
sflow_actions->tunnel.ip_ttl = key->ipv4_ttl;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
}
break;
case OVS_KEY_ATTR_IPV6:
/* TODO: parse IPv6 encap. */
break;
/* These have the same structure and format. */
case OVS_KEY_ATTR_TCP:
case OVS_KEY_ATTR_UDP:
case OVS_KEY_ATTR_SCTP:
if (sflow_actions->encap_depth == 1) {
const struct ovs_key_tcp *key = nl_attr_get(attr);
if (key->tcp_src) {
sflow_actions->tunnel.tp_src = key->tcp_src;
}
if (key->tcp_dst) {
sflow_actions->tunnel.tp_dst = key->tcp_dst;
}
}
break;
case OVS_KEY_ATTR_TCP_FLAGS:
case OVS_KEY_ATTR_ICMP:
case OVS_KEY_ATTR_ICMPV6:
case OVS_KEY_ATTR_ARP:
case OVS_KEY_ATTR_ND:
case OVS_KEY_ATTR_ND_EXTENSIONS:
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
case OVS_KEY_ATTR_CT_STATE:
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
datapath: Add original direction conntrack tuple to sw_flow_key. Upstream commit: commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc Author: Jarno Rajahalme <jarno@ovn.org> Date: Thu Feb 9 11:21:59 2017 -0800 openvswitch: Add original direction conntrack tuple to sw_flow_key. Add the fields of the conntrack original direction 5-tuple to struct sw_flow_key. The new fields are initially marked as non-existent, and are populated whenever a conntrack action is executed and either finds or generates a conntrack entry. This means that these fields exist for all packets that were not rejected by conntrack as untrackable. The original tuple fields in the sw_flow_key are filled from the original direction tuple of the conntrack entry relating to the current packet, or from the original direction tuple of the master conntrack entry, if the current conntrack entry has a master. Generally, expected connections of connections having an assigned helper (e.g., FTP), have a master conntrack entry. The main purpose of the new conntrack original tuple fields is to allow matching on them for policy decision purposes, with the premise that the admissibility of tracked connections reply packets (as well as original direction packets), and both direction packets of any related connections may be based on ACL rules applying to the master connection's original direction 5-tuple. This also makes it easier to make policy decisions when the actual packet headers might have been transformed by NAT, as the original direction 5-tuple represents the packet headers before any such transformation. When using the original direction 5-tuple the admissibility of return and/or related packets need not be based on the mere existence of a conntrack entry, allowing separation of admission policy from the established conntrack state. While existence of a conntrack entry is required for admission of the return or related packets, policy changes can render connections that were initially admitted to be rejected or dropped afterwards. If the admission of the return and related packets was based on mere conntrack state (e.g., connection being in an established state), a policy change that would make the connection rejected or dropped would need to find and delete all conntrack entries affected by such a change. When using the original direction 5-tuple matching the affected conntrack entries can be allowed to time out instead, as the established state of the connection would not need to be the basis for packet admission any more. It should be noted that the directionality of related connections may be the same or different than that of the master connection, and neither the original direction 5-tuple nor the conntrack state bits carry this information. If needed, the directionality of the master connection can be stored in master's conntrack mark or labels, which are automatically inherited by the expected related connections. The fact that neither ARP nor ND packets are trackable by conntrack allows mutual exclusion between ARP/ND and the new conntrack original tuple fields. Hence, the IP addresses are overlaid in union with ARP and ND fields. This allows the sw_flow_key to not grow much due to this patch, but it also means that we must be careful to never use the new key fields with ARP or ND packets. ARP is easy to distinguish and keep mutually exclusive based on the ethernet type, but ND being an ICMPv6 protocol requires a bit more attention. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> This patch squashes in minimal amount of OVS userspace code to not break the build. Later patches contain the full userspace support. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-03-08 17:18:22 -08:00
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
case OVS_KEY_ATTR_UNSPEC:
userspace: Switching of L3 packets in L2 pipeline Ports have a new layer3 attribute if they send/receive L3 packets. The packet_type included in structs dp_packet and flow is considered in ofproto-dpif. The classical L2 match fields (dl_src, dl_dst, dl_type, and vlan_tci, vlan_vid, vlan_pcp) now have Ethernet as pre-requisite. A dummy ethernet header is pushed to L3 packets received from L3 ports before the the pipeline processing starts. The ethernet header is popped before sending a packet to a L3 port. For datapath ports that can receive L2 or L3 packets, the packet_type becomes part of the flow key for datapath flows and is handled appropriately in dpif-netdev. In the 'else' branch in flow_put_on_pmd() function, the additional check flow_equal(&match.flow, &netdev_flow->flow) was removed, as a) the dpcls lookup is sufficient to uniquely identify a flow and b) it caused false negatives because the flow in netdev->flow may not properly masked. In dpif_netdev_flow_put() we now use the same method for constructing the netdev_flow_key as the one used when adding the flow to the dplcs to make sure these always match. The function netdev_flow_key_from_flow() used so far was not only inefficient but sometimes caused mismatches and subsequent flow update failures. The kernel datapath does not support the packet_type match field. Instead it encodes the packet type implictly by the presence or absence of the Ethernet attribute in the flow key and mask. This patch filters the PACKET_TYPE attribute out of netlink flow key and mask to be sent to the kernel datapath. Signed-off-by: Lorand Jakab <lojakab@cisco.com> Signed-off-by: Simon Horman <simon.horman@netronome.com> Signed-off-by: Jiri Benc <jbenc@redhat.com> Signed-off-by: Yi Yang <yi.y.yang@intel.com> Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com> Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2017-06-02 16:16:17 +00:00
case OVS_KEY_ATTR_PACKET_TYPE:
case OVS_KEY_ATTR_NSH:
openvswitch.h: Align uAPI definition with the kernel. Upstream commit: commit 1926407a4ab0e59d5a27bed7b82029b356d80fa0 Author: Ilya Maximets <i.maximets@ovn.org> Date: Wed Mar 9 23:20:33 2022 +0100 net: openvswitch: fix uAPI incompatibility with existing user space Few years ago OVS user space made a strange choice in the commit [1] to define types only valid for the user space inside the copy of a kernel uAPI header. '#ifndef __KERNEL__' and another attribute was added later. This leads to the inevitable clash between user space and kernel types when the kernel uAPI is extended. The issue was unveiled with the addition of a new type for IPv6 extension header in kernel uAPI. When kernel provides the OVS_KEY_ATTR_IPV6_EXTHDRS attribute to the older user space application, application tries to parse it as OVS_KEY_ATTR_PACKET_TYPE and discards the whole netlink message as malformed. Since OVS_KEY_ATTR_IPV6_EXTHDRS is supplied along with every IPv6 packet that goes to the user space, IPv6 support is fully broken. Fixing that by bringing these user space attributes to the kernel uAPI to avoid the clash. Strictly speaking this is not the problem of the kernel uAPI, but changing it is the only way to avoid breakage of the older user space applications at this point. These 2 types are explicitly rejected now since they should not be passed to the kernel. Additionally, OVS_KEY_ATTR_TUNNEL_INFO moved out from the '#ifdef __KERNEL__' as there is no good reason to hide it from the userspace. And it's also explicitly rejected now, because it's for in-kernel use only. Comments with warnings were added to avoid the problem coming back. (1 << type) converted to (1ULL << type) to avoid integer overflow on OVS_KEY_ATTR_IPV6_EXTHDRS, since it equals 32 now. [1] beb75a40fdc2 ("userspace: Switching of L3 packets in L2 pipeline") Fixes: 28a3f0601727 ("net: openvswitch: IPv6: Add IPv6 extension header support") Link: https://lore.kernel.org/netdev/3adf00c7-fe65-3ef4-b6d7-6d8a0cad8a5f@nvidia.com Link: https://github.com/openvswitch/ovs/commit/beb75a40fdc295bfd6521b0068b4cd12f6de507c Reported-by: Roi Dayan <roid@nvidia.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Acked-by: Aaron Conole <aconole@redhat.com> Link: https://lore.kernel.org/r/20220309222033.3018976-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski <kuba@kernel.org> Not adding OVS_KEY_ATTR_IPV6_EXTHDRS in this commit as this is not necessary. Will be added along with the actual userspace implementation. This change should help avoiding incompatibility issues in the future. Acked-by: Aaron Conole <aconole@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2022-03-25 14:48:23 +01:00
case OVS_KEY_ATTR_TUNNEL_INFO:
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
case __OVS_KEY_ATTR_MAX:
default:
break;
}
}
static void
dpif_sflow_capture_input_mpls(const struct flow *flow,
struct dpif_sflow_actions *sflow_actions)
{
if (eth_type_mpls(flow->dl_type)) {
int depth = 0;
int ii;
ovs_be32 lse;
/* Calculate depth by detecting BOS. */
for (ii = 0; ii < FLOW_MAX_MPLS_LABELS; ii++) {
lse = flow->mpls_lse[ii];
depth++;
if (lse & htonl(MPLS_BOS_MASK)) {
break;
}
}
/* Capture stack, reversing stack order, and
* using host-byte-order for each lse. BOS flag
* is ignored for now. It is set later when
* the output stack is encoded.
*/
for (ii = 0; ii < depth; ii++) {
lse = flow->mpls_lse[ii];
sflow_actions->mpls_lse[depth - ii - 1] = ntohl(lse);
}
sflow_actions->mpls_stack_depth = depth;
}
}
void
dpif_sflow_read_actions(const struct flow *flow,
const struct nlattr *actions, size_t actions_len,
struct dpif_sflow_actions *sflow_actions,
bool capture_mpls)
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
{
const struct nlattr *a;
unsigned int left;
if (actions_len == 0) {
/* Packet dropped.*/
return;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
if (flow != NULL && capture_mpls == true) {
/* Make sure the MPLS output stack
* is seeded with the input stack.
*/
dpif_sflow_capture_input_mpls(flow, sflow_actions);
/* XXX when 802.1AD(QinQ) is supported then
* we can do the same with VLAN stacks here
*/
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
enum ovs_action_attr type = nl_attr_type(a);
switch (type) {
case OVS_ACTION_ATTR_OUTPUT:
/* Capture the output port in case we need it
* to get the output tunnel type.
*/
sflow_actions->out_port = nl_attr_get_odp_port(a);
break;
case OVS_ACTION_ATTR_TUNNEL_POP:
/* XXX: Do not handle this for now. It's not clear
* if we should start with encap_depth == 1 when we
* see an input tunnel, or if we should assume
* that the input tunnel was always "popped" if it
* was presented to us decoded in flow->tunnel?
*
* If we do handle this it might look like this,
* as we clear the captured tunnel info and decrement
* the encap_depth:
*
* memset(&sflow_actions->tunnel, 0, sizeof struct flow_tnl);
* sflow_actions->tunnel_ipproto = 0;
* --sflow_actions->encap_depth;
*
* but for now just disable the tunnel annotation:
*/
sflow_actions->tunnel_err = true;
break;
case OVS_ACTION_ATTR_TUNNEL_PUSH:
/* XXX: This actions appears to come with it's own
* OUTPUT action, so should it be regarded as having
* an implicit "pop" following it too? Put another
* way, would two tnl_push() actions in succession
* result in a packet with two layers of encap?
*/
if (++sflow_actions->encap_depth > 1) {
/* Do not handle multi-encap for now. */
sflow_actions->tunnel_err = true;
} else {
sflow_read_tnl_push_action(a, sflow_actions);
}
break;
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_HASH:
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
case OVS_ACTION_ATTR_CT:
userspace: Avoid dp_hash recirculation for balance-tcp bond mode. Problem: In OVS, flows with output over a bond interface of type “balance-tcp” gets translated by the ofproto layer into "HASH" and "RECIRC" datapath actions. After recirculation, the packet is forwarded to the bond member port based on 8-bits of the datapath hash value computed through dp_hash. This causes performance degradation in the following ways: 1. The recirculation of the packet implies another lookup of the packet’s flow key in the exact match cache (EMC) and potentially Megaflow classifier (DPCLS). This is the biggest cost factor. 2. The recirculated packets have a new “RSS” hash and compete with the original packets for the scarce number of EMC slots. This implies more EMC misses and potentially EMC thrashing causing costly DPCLS lookups. 3. The 256 extra megaflow entries per bond for dp_hash bond selection put additional load on the revalidation threads. Owing to this performance degradation, deployments stick to “balance-slb” bond mode even though it does not do active-active load balancing for VXLAN- and GRE-tunnelled traffic because all tunnel packet have the same source MAC address. Proposed optimization: This proposal introduces a new load-balancing output action instead of recirculation. Maintain one table per-bond (could just be an array of uint16's) and program it the same way internal flows are created today for each possible hash value (256 entries) from ofproto layer. Use this table to load-balance flows as part of output action processing. Currently xlate_normal() -> output_normal() -> bond_update_post_recirc_rules() -> bond_may_recirc() and compose_output_action__() generate 'dp_hash(hash_l4(0))' and 'recirc(<RecircID>)' actions. In this case the RecircID identifies the bond. For the recirculated packets the ofproto layer installs megaflow entries that match on RecircID and masked dp_hash and send them to the corresponding output port. Instead, we will now generate action as 'lb_output(<bond id>)' This combines hash computation (only if needed, else re-use RSS hash) and inline load-balancing over the bond. This action is used *only* for balance-tcp bonds in userspace datapath (the OVS kernel datapath remains unchanged). Example: Current scheme: With 8 UDP flows (with random UDP src port): flow-dump from pmd on cpu core: 2 recirc_id(0),in_port(7),<...> actions:hash(hash_l4(0)),recirc(0x1) recirc_id(0x1),dp_hash(0xf8e02b7e/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0xb236c260/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0x7d89eb18/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0xa78d75df/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0xb58d846f/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0x24534406/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0x3cf32550/0xff),<...> actions:1 New scheme: We can do with a single flow entry (for any number of new flows): in_port(7),<...> actions:lb_output(1) A new CLI has been added to dump datapath bond cache as given below. # ovs-appctl dpif-netdev/bond-show [dp] Bond cache: bond-id 1 : bucket 0 - slave 2 bucket 1 - slave 1 bucket 2 - slave 2 bucket 3 - slave 1 Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com> Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com> Signed-off-by: Vishal Deep Ajmera <vishal.deep.ajmera@ericsson.com> Tested-by: Matteo Croce <mcroce@redhat.com> Tested-by: Adrian Moreno <amorenoz@redhat.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-05-22 10:50:05 +02:00
case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_METER:
userspace: Avoid dp_hash recirculation for balance-tcp bond mode. Problem: In OVS, flows with output over a bond interface of type “balance-tcp” gets translated by the ofproto layer into "HASH" and "RECIRC" datapath actions. After recirculation, the packet is forwarded to the bond member port based on 8-bits of the datapath hash value computed through dp_hash. This causes performance degradation in the following ways: 1. The recirculation of the packet implies another lookup of the packet’s flow key in the exact match cache (EMC) and potentially Megaflow classifier (DPCLS). This is the biggest cost factor. 2. The recirculated packets have a new “RSS” hash and compete with the original packets for the scarce number of EMC slots. This implies more EMC misses and potentially EMC thrashing causing costly DPCLS lookups. 3. The 256 extra megaflow entries per bond for dp_hash bond selection put additional load on the revalidation threads. Owing to this performance degradation, deployments stick to “balance-slb” bond mode even though it does not do active-active load balancing for VXLAN- and GRE-tunnelled traffic because all tunnel packet have the same source MAC address. Proposed optimization: This proposal introduces a new load-balancing output action instead of recirculation. Maintain one table per-bond (could just be an array of uint16's) and program it the same way internal flows are created today for each possible hash value (256 entries) from ofproto layer. Use this table to load-balance flows as part of output action processing. Currently xlate_normal() -> output_normal() -> bond_update_post_recirc_rules() -> bond_may_recirc() and compose_output_action__() generate 'dp_hash(hash_l4(0))' and 'recirc(<RecircID>)' actions. In this case the RecircID identifies the bond. For the recirculated packets the ofproto layer installs megaflow entries that match on RecircID and masked dp_hash and send them to the corresponding output port. Instead, we will now generate action as 'lb_output(<bond id>)' This combines hash computation (only if needed, else re-use RSS hash) and inline load-balancing over the bond. This action is used *only* for balance-tcp bonds in userspace datapath (the OVS kernel datapath remains unchanged). Example: Current scheme: With 8 UDP flows (with random UDP src port): flow-dump from pmd on cpu core: 2 recirc_id(0),in_port(7),<...> actions:hash(hash_l4(0)),recirc(0x1) recirc_id(0x1),dp_hash(0xf8e02b7e/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0xb236c260/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0x7d89eb18/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0xa78d75df/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0xb58d846f/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0x24534406/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0x3cf32550/0xff),<...> actions:1 New scheme: We can do with a single flow entry (for any number of new flows): in_port(7),<...> actions:lb_output(1) A new CLI has been added to dump datapath bond cache as given below. # ovs-appctl dpif-netdev/bond-show [dp] Bond cache: bond-id 1 : bucket 0 - slave 2 bucket 1 - slave 1 bucket 2 - slave 2 bucket 3 - slave 1 Co-authored-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com> Signed-off-by: Manohar Krishnappa Chidambaraswamy <manukc@gmail.com> Signed-off-by: Vishal Deep Ajmera <vishal.deep.ajmera@ericsson.com> Tested-by: Matteo Croce <mcroce@redhat.com> Tested-by: Adrian Moreno <amorenoz@redhat.com> Acked-by: Eelco Chaudron <echaudro@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2020-05-22 10:50:05 +02:00
case OVS_ACTION_ATTR_LB_OUTPUT:
break;
case OVS_ACTION_ATTR_SET_MASKED:
/* TODO: apply mask. XXX: Are we likely to see this? */
break;
case OVS_ACTION_ATTR_SET:
sflow_read_set_action(nl_attr_get(a), sflow_actions);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
/* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
* construct a VLAN-stack. The sFlow user-action cookie already
* captures the egress VLAN ID so there is nothing more to do here.
*/
break;
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
if (mpls) {
dpif_sflow_push_mpls_lse(sflow_actions, mpls->mpls_lse);
}
break;
}
case OVS_ACTION_ATTR_POP_MPLS: {
dpif_sflow_pop_mpls_lse(sflow_actions);
break;
}
case OVS_ACTION_ATTR_PUSH_ETH:
case OVS_ACTION_ATTR_POP_ETH:
/* TODO: SFlow does not currently define a MAC-in-MAC
* encapsulation structure. We could use an extension
* structure to report this.
*/
break;
case OVS_ACTION_ATTR_CLONE:
if (flow != NULL) {
dpif_sflow_read_actions(flow, nl_attr_get(a), nl_attr_get_size(a),
sflow_actions, false);
}
break;
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_UNSPEC:
Add a new OVS action check_pkt_larger This patch adds a new action 'check_pkt_larger' which checks if the packet is larger than the given size and stores the result in the destination register. Usage: check_pkt_larger(len)->REGISTER Eg. match=...,actions=check_pkt_larger(1442)->NXM_NX_REG0[0],next; This patch makes use of the new datapath action - 'check_pkt_len' which was recently added in the commit [1]. At the start of ovs-vswitchd, datapath is probed for this action. If the datapath action is present, then 'check_pkt_larger' makes use of this datapath action. Datapath action 'check_pkt_len' takes these nlattrs * OVS_CHECK_PKT_LEN_ATTR_PKT_LEN - 'pkt_len' to check for * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER (optional) - Nested actions to apply if the packet length is greater than the specified 'pkt_len' * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL (optional) - Nested actions to apply if the packet length is lesser or equal to the specified 'pkt_len'. Let's say we have these flows added to an OVS bridge br-int table=0, priority=100 in_port=1,ip,actions=check_pkt_larger:100->NXM_NX_REG0[0],resubmit(,1) table=1, priority=200,in_port=1,ip,reg0=0x1/0x1 actions=output:3 table=1, priority=100,in_port=1,ip,actions=output:4 Then the action 'check_pkt_larger' will be translated as - check_pkt_len(size=100,gt(3),le(4)) datapath will check the packet length and if the packet length is greater than 100, it will output to port 3, else it will output to port 4. In case, datapath doesn't support 'check_pkt_len' action, the OVS action 'check_pkt_larger' sets SLOW_ACTION so that datapath flow is not added. This OVS action is intended to be used by OVN to check the packet length and generate an ICMP packet with type 3, code 4 and next hop mtu in the logical router pipeline if the MTU of the physical interface is lesser than the packet length. More information can be found here [2] [1] - https://kernel.googlesource.com/pub/scm/linux/kernel/git/davem/net-next/+/4d5ec89fc8d14dcdab7214a0c13a1c7321dc6ea9 [2] - https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-July/047039.html Suggested-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Numan Siddique <nusiddiq@redhat.com> CC: Ben Pfaff <blp@ovn.org> CC: Gregory Rose <gvrose8192@gmail.com> Acked-by: Mark Michelson <mmichels@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-04-23 00:53:38 +05:30
case OVS_ACTION_ATTR_CHECK_PKT_LEN:
userspace: Improved packet drop statistics. Currently OVS maintains explicit packet drop/error counters only on port level. Packets that are dropped as part of normal OpenFlow processing are counted in flow stats of “drop” flows or as table misses in table stats. These can only be interpreted by controllers that know the semantics of the configured OpenFlow pipeline. Without that knowledge, it is impossible for an OVS user to obtain e.g. the total number of packets dropped due to OpenFlow rules. Furthermore, there are numerous other reasons for which packets can be dropped by OVS slow path that are not related to the OpenFlow pipeline. The generated datapath flow entries include a drop action to avoid further expensive upcalls to the slow path, but subsequent packets dropped by the datapath are not accounted anywhere. Finally, the datapath itself drops packets in certain error situations. Also, these drops are today not accounted for.This makes it difficult for OVS users to monitor packet drop in an OVS instance and to alert a management system in case of a unexpected increase of such drops. Also OVS trouble-shooters face difficulties in analysing packet drops. With this patch we implement following changes to address the issues mentioned above. 1. Identify and account all the silent packet drop scenarios 2. Display these drops in ovs-appctl coverage/show Co-authored-by: Rohith Basavaraja <rohith.basavaraja@gmail.com> Co-authored-by: Keshav Gupta <keshugupta1@gmail.com> Signed-off-by: Anju Thomas <anju.thomas@ericsson.com> Signed-off-by: Rohith Basavaraja <rohith.basavaraja@gmail.com> Signed-off-by: Keshav Gupta <keshugupta1@gmail.com> Acked-by: Eelco Chaudron <echaudro@redhat.com Acked-by: Ben Pfaff <blp@ovn.org> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2019-12-18 05:48:12 +01:00
case OVS_ACTION_ATTR_DROP:
2021-11-29 11:52:05 +05:30
case OVS_ACTION_ATTR_ADD_MPLS:
case OVS_ACTION_ATTR_DEC_TTL:
case OVS_ACTION_ATTR_PSAMPLE:
case __OVS_ACTION_ATTR_MAX:
default:
break;
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
}
static void
dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
uint32_t *mpls_lse_buf,
const struct dpif_sflow_actions *sflow_actions)
{
/* Put the MPLS stack back into "packet header" order,
* and make sure the BOS flag is set correctly on the last
* one. Each lse is still in host-byte-order.
*/
int ii;
uint32_t lse;
stack->depth = sflow_actions->mpls_stack_depth;
stack->stack = mpls_lse_buf;
for (ii = 0; ii < stack->depth; ii++) {
lse = sflow_actions->mpls_lse[stack->depth - ii - 1];
stack->stack[ii] = (lse & ~MPLS_BOS_MASK);
}
stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;
}
/* Extract the output port count from the user action cookie.
* See http://sflow.org/sflow_version_5.txt "Input/Output port information"
*/
static uint32_t
dpif_sflow_cookie_num_outputs(const struct user_action_cookie *cookie)
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
{
uint32_t format = cookie->sflow.output & 0xC0000000;
uint32_t port_n = cookie->sflow.output & 0x3FFFFFFF;
if (format == 0) {
return port_n ? 1 : 0;
}
else if (format == 0x80000000) {
return port_n;
}
return 0;
}
void
dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
const struct flow *flow, odp_port_t odp_in_port,
const struct user_action_cookie *cookie,
const struct dpif_sflow_actions *sflow_actions)
OVS_EXCLUDED(mutex)
{
SFL_FLOW_SAMPLE_TYPE fs;
SFLFlow_sample_element hdrElem;
SFLSampled_header *header;
SFLFlow_sample_element switchElem;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
uint8_t tnlInProto, tnlOutProto;
SFLFlow_sample_element tnlInElem, tnlOutElem;
SFLFlow_sample_element vniInElem, vniOutElem;
SFLFlow_sample_element mplsElem;
uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
sflow: Fix sFlow sampling structure. According to Neil McKee, in an email archived at http://openvswitch.org/pipermail/dev_openvswitch.org/2010-January/000934.html: The containment rule is that a given sflow-datasource (sampler or poller) should be scoped within only one sflow-agent (or sub-agent). So the issue arrises when you have two switches/datapaths defined on the same host being managed with the same IP address: each switch is a separate sub-agent, so they can run independently (e.g. with their own sequence numbers) but they can't both claim to speak for the same sflow-datasource. Specifically, they can't both represent the <ifindex>:0 data-source. This containment rule is necessary so that the sFlow collector can scale and combine the results accurately. One option would be to stick with the <ifindex>:0 data-source but elevate it to be global across all bridges, with a global sample_pool and a global sflow_agent. Not tempting. Better to go the other way and allow each interface to have it's own sampler, just as it already has it's own poller. The ifIndex numbers are globally unique across all switches/datapaths on the host, so the containment is now clean. Datasource <ifindex>:5 might be on one switch, whille <ifindex>:7 can be on another. Other benefits are that 1) you can support the option of overriding the default sampling-rate on an interface-by-interface basis, and 2) this is how most sFlow implementations are coded, so there will be no surprises or interoperability issues with any sFlow collectors out there. This commit implements the approach suggested by Neil. This commit uses an atomic_t to represent the sampling pool. This is because we do want access to it to be atomic, but we expect that it will "mostly" be accessed from a single CPU at a time. Perhaps this is a bad assumption; we can always switch to another form of synchronization later. CC: Neil McKee <neil.mckee@inmon.com>
2010-01-20 13:52:42 -08:00
SFLSampler *sampler;
struct dpif_sflow_port *in_dsp;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
struct dpif_sflow_port *out_dsp;
ovs_be16 vlan_tci;
ovs_mutex_lock(&mutex);
if (!ds->sflow_agent || !ds->sflow_agent->samplers) {
goto out;
}
sampler = ds->sflow_agent->samplers;
/* Build a flow sample. */
memset(&fs, 0, sizeof fs);
/* Look up the input ifIndex if this port has one. Otherwise just
* leave it as 0 (meaning 'unknown') and continue. */
in_dsp = dpif_sflow_find_port(ds, odp_in_port);
if (in_dsp) {
fs.input = SFL_DS_INDEX(in_dsp->dsi);
sflow: Fix sFlow sampling structure. According to Neil McKee, in an email archived at http://openvswitch.org/pipermail/dev_openvswitch.org/2010-January/000934.html: The containment rule is that a given sflow-datasource (sampler or poller) should be scoped within only one sflow-agent (or sub-agent). So the issue arrises when you have two switches/datapaths defined on the same host being managed with the same IP address: each switch is a separate sub-agent, so they can run independently (e.g. with their own sequence numbers) but they can't both claim to speak for the same sflow-datasource. Specifically, they can't both represent the <ifindex>:0 data-source. This containment rule is necessary so that the sFlow collector can scale and combine the results accurately. One option would be to stick with the <ifindex>:0 data-source but elevate it to be global across all bridges, with a global sample_pool and a global sflow_agent. Not tempting. Better to go the other way and allow each interface to have it's own sampler, just as it already has it's own poller. The ifIndex numbers are globally unique across all switches/datapaths on the host, so the containment is now clean. Datasource <ifindex>:5 might be on one switch, whille <ifindex>:7 can be on another. Other benefits are that 1) you can support the option of overriding the default sampling-rate on an interface-by-interface basis, and 2) this is how most sFlow implementations are coded, so there will be no surprises or interoperability issues with any sFlow collectors out there. This commit implements the approach suggested by Neil. This commit uses an atomic_t to represent the sampling pool. This is because we do want access to it to be atomic, but we expect that it will "mostly" be accessed from a single CPU at a time. Perhaps this is a bad assumption; we can always switch to another form of synchronization later. CC: Neil McKee <neil.mckee@inmon.com>
2010-01-20 13:52:42 -08:00
}
/* Make the assumption that the random number generator in the
* datapath converges to the configured mean, and just increment the
* samplePool by the configured sampling rate every time. */
sampler->samplePool += sfl_sampler_get_sFlowFsPacketSamplingRate(sampler);
/* Sampled header. */
memset(&hdrElem, 0, sizeof hdrElem);
hdrElem.tag = SFLFLOW_HEADER;
header = &hdrElem.flowType.header;
header->header_protocol = SFLHEADER_ETHERNET_ISO8023;
/* The frame_length should include the Ethernet FCS (4 bytes),
* but it has already been stripped, so we need to add 4 here. */
header->frame_length = dp_packet_size(packet) + 4;
/* Ethernet FCS stripped off. */
header->stripped = 4;
header->header_length = MIN(dp_packet_size(packet),
datapath: Report kernel's flow key when passing packets up to userspace. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. This commit takes one step in that direction by making the kernel report its idea of the flow that a packet belongs to whenever it passes a packet up to userspace. This means that userspace can intelligently figure out what to do: - If userspace's notion of the flow for the packet matches the kernel's, then nothing special is necessary. - If the kernel has a more specific notion for the flow than userspace, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary: userspace can still set up the flow in the usual way. - If userspace has a more specific notion for the flow than the kernel, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. (This case is bad from a performance point of view, but at least it is correct.) This commit does not actually make userspace flexible enough to handle changes in the kernel flow key structure, although userspace does now have enough information to do that intelligently. This will have to wait for later commits. This commit is bigger than it would otherwise be because it is rolled together with changing "struct odp_msg" to a sequence of Netlink attributes. The alternative, to do each of those changes in a separate patch, seemed like overkill because it meant that either we would have to introduce and then kill off Netlink attributes for in_port and tun_id, if Netlink conversion went first, or shove yet another variable-length header into the stuff already after odp_msg, if adding the flow key to odp_msg went first. This commit will slow down performance of checksumming packets sent up to userspace. I'm not entirely pleased with how I did it. I considered a couple of alternatives, but none of them seemed that much better. Suggestions welcome. Not changing anything wasn't an option, unfortunately. At any rate some slowdown will become unavoidable when OVS actually starts using Netlink instead of just Netlink framing. (Actually, I thought of one option where we could avoid that: make userspace do the checksum instead, by passing csum_start and csum_offset as part of what goes to userspace. But that's not perfect either.) Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
sampler->sFlowFsMaximumHeaderSize);
header->header_bytes = dp_packet_data(packet);
/* Add extended switch element. */
memset(&switchElem, 0, sizeof(switchElem));
switchElem.tag = SFLFLOW_EX_SWITCH;
switchElem.flowType.sw.src_vlan = vlan_tci_to_vid(flow->vlans[0].tci);
switchElem.flowType.sw.src_priority = vlan_tci_to_pcp(flow->vlans[0].tci);
/* Retrieve data from user_action_cookie. */
vlan_tci = cookie->sflow.vlan_tci;
switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(vlan_tci);
switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(vlan_tci);
fs.output = cookie->sflow.output;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
/* Input tunnel. */
if (flow->tunnel.ip_dst) {
memset(&tnlInElem, 0, sizeof(tnlInElem));
tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
tnlInProto = in_dsp ? dpif_sflow_tunnel_proto(in_dsp->tunnel_type) : 0;
dpif_sflow_tunnel_v4(tnlInProto,
&flow->tunnel,
&tnlInElem.flowType.ipv4);
SFLADD_ELEMENT(&fs, &tnlInElem);
if (flow->tunnel.tun_id) {
memset(&vniInElem, 0, sizeof(vniInElem));
vniInElem.tag = SFLFLOW_EX_VNI_INGRESS;
vniInElem.flowType.tunnel_vni.vni
= ntohll(flow->tunnel.tun_id);
SFLADD_ELEMENT(&fs, &vniInElem);
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
/* Output tunnel. */
if (sflow_actions
&& sflow_actions->encap_depth == 1
&& !sflow_actions->tunnel_err
&& dpif_sflow_cookie_num_outputs(cookie) == 1) {
tnlOutProto = sflow_actions->tunnel_ipproto;
if (tnlOutProto == 0) {
/* Try to infer the ip-protocol from the output port. */
if (sflow_actions->out_port != ODPP_NONE) {
out_dsp = dpif_sflow_find_port(ds, sflow_actions->out_port);
if (out_dsp) {
tnlOutProto = dpif_sflow_tunnel_proto(out_dsp->tunnel_type);
}
}
}
memset(&tnlOutElem, 0, sizeof(tnlOutElem));
tnlOutElem.tag = SFLFLOW_EX_IPV4_TUNNEL_EGRESS;
dpif_sflow_tunnel_v4(tnlOutProto,
&sflow_actions->tunnel,
&tnlOutElem.flowType.ipv4);
SFLADD_ELEMENT(&fs, &tnlOutElem);
if (sflow_actions->tunnel.tun_id) {
memset(&vniOutElem, 0, sizeof(vniOutElem));
vniOutElem.tag = SFLFLOW_EX_VNI_EGRESS;
vniOutElem.flowType.tunnel_vni.vni
= ntohll(sflow_actions->tunnel.tun_id);
SFLADD_ELEMENT(&fs, &vniOutElem);
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
/* MPLS output label stack. */
if (sflow_actions
&& sflow_actions->mpls_stack_depth > 0
&& !sflow_actions->mpls_err
&& dpif_sflow_cookie_num_outputs(cookie) == 1) {
memset(&mplsElem, 0, sizeof(mplsElem));
mplsElem.tag = SFLFLOW_EX_MPLS;
dpif_sflow_encode_mpls_stack(&mplsElem.flowType.mpls.out_stack,
mpls_lse_buf,
sflow_actions);
SFLADD_ELEMENT(&fs, &mplsElem);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
/* Submit the flow sample to be encoded into the next datagram. */
SFLADD_ELEMENT(&fs, &hdrElem);
SFLADD_ELEMENT(&fs, &switchElem);
sfl_sampler_writeFlowSample(sampler, &fs);
out:
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_run(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
if (ds->collectors != NULL) {
time_t now = time_now();
route_table_run();
if (now >= ds->next_tick) {
sfl_agent_tick(ds->sflow_agent, time_wall());
ds->next_tick = now + 1;
}
}
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_wait(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
if (ds->collectors != NULL) {
poll_timer_wait_until(ds->next_tick * 1000LL);
}
ovs_mutex_unlock(&mutex);
}