2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-29 05:18:13 +00:00
ovs/ofproto/ofproto-dpif-sflow.c

1433 lines
48 KiB
C
Raw Normal View History

/*
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
* Copyright (c) 2009 InMon Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "ofproto-dpif-sflow.h"
#include <inttypes.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <net/if.h>
#include <stdlib.h>
#include "collectors.h"
#include "compiler.h"
#include "dpif.h"
#include "hash.h"
#include "openvswitch/hmap.h"
#include "netdev.h"
#include "netlink.h"
#include "openvswitch/ofpbuf.h"
#include "ofproto.h"
#include "packets.h"
#include "openvswitch/poll-loop.h"
#include "ovs-router.h"
#include "route-table.h"
#include "sflow_api.h"
#include "socket-util.h"
#include "timeval.h"
#include "openvswitch/vlog.h"
#include "lib/odp-util.h"
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
#include "lib/unaligned.h"
#include "ofproto-provider.h"
#include "lacp.h"
VLOG_DEFINE_THIS_MODULE(sflow);
static struct ovs_mutex mutex;
/* This global var is used to determine which sFlow
sub-agent should send the datapath counters. */
#define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1
static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
/*
* The enum dpif_sflow_tunnel_type is to declare the types supported
*/
enum dpif_sflow_tunnel_type {
DPIF_SFLOW_TUNNEL_UNKNOWN = 0,
DPIF_SFLOW_TUNNEL_VXLAN,
DPIF_SFLOW_TUNNEL_GRE,
DPIF_SFLOW_TUNNEL_LISP,
DPIF_SFLOW_TUNNEL_GENEVE
};
struct dpif_sflow_port {
struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
struct ofport *ofport; /* To retrive port stats. */
odp_port_t odp_port;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
enum dpif_sflow_tunnel_type tunnel_type;
};
struct dpif_sflow {
struct collectors *collectors;
SFLAgent *sflow_agent;
struct ofproto_sflow_options *options;
time_t next_tick;
size_t n_flood, n_all;
struct hmap ports; /* Contains "struct dpif_sflow_port"s. */
uint32_t probability;
struct ovs_refcount ref_cnt;
};
static void dpif_sflow_del_port__(struct dpif_sflow *,
struct dpif_sflow_port *);
#define RECEIVER_INDEX 1
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
static bool
ofproto_sflow_options_equal(const struct ofproto_sflow_options *a,
const struct ofproto_sflow_options *b)
{
return (sset_equals(&a->targets, &b->targets)
&& a->sampling_rate == b->sampling_rate
&& a->polling_interval == b->polling_interval
&& a->header_len == b->header_len
&& a->sub_id == b->sub_id
&& nullable_string_is_equal(a->agent_device, b->agent_device)
&& nullable_string_is_equal(a->control_ip, b->control_ip));
}
static struct ofproto_sflow_options *
ofproto_sflow_options_clone(const struct ofproto_sflow_options *old)
{
struct ofproto_sflow_options *new = xmemdup(old, sizeof *old);
sset_clone(&new->targets, &old->targets);
new->agent_device = nullable_xstrdup(old->agent_device);
new->control_ip = nullable_xstrdup(old->control_ip);
return new;
}
static void
ofproto_sflow_options_destroy(struct ofproto_sflow_options *options)
{
if (options) {
sset_destroy(&options->targets);
free(options->agent_device);
free(options->control_ip);
free(options);
}
}
/* sFlow library callback to allocate memory. */
static void *
sflow_agent_alloc_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
size_t bytes)
{
return xzalloc(bytes);
}
/* sFlow library callback to free memory. */
static int
sflow_agent_free_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
void *obj)
{
free(obj);
return 0;
}
/* sFlow library callback to report error. */
static void
sflow_agent_error_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
char *msg)
{
VLOG_WARN("sFlow agent error: %s", msg);
}
/* sFlow library callback to send datagram. */
static void
sflow_agent_send_packet_cb(void *ds_, SFLAgent *agent OVS_UNUSED,
SFLReceiver *receiver OVS_UNUSED, u_char *pkt,
uint32_t pktLen)
{
struct dpif_sflow *ds = ds_;
collectors_send(ds->collectors, pkt, pktLen);
}
static struct dpif_sflow_port *
dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
OVS_REQUIRES(mutex)
{
struct dpif_sflow_port *dsp;
HMAP_FOR_EACH_IN_BUCKET (dsp, hmap_node, hash_odp_port(odp_port),
&ds->ports) {
if (dsp->odp_port == odp_port) {
return dsp;
}
}
return NULL;
}
/* Call to get the datapath stats. Modeled after the dpctl utility.
*
* It might be more efficient for this module to be given a handle it can use
* to get these stats more efficiently, but this is only going to be called
* once every 20-30 seconds. Return number of datapaths found (normally expect
* 1). */
static int
sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
struct dpif_dp_stats *dp_totals)
{
struct sset types;
const char *type;
int count = 0;
memset(dp_totals, 0, sizeof *dp_totals);
sset_init(&types);
dp_enumerate_types(&types);
SSET_FOR_EACH (type, &types) {
struct sset names;
const char *name;
sset_init(&names);
if (dp_enumerate_names(type, &names) == 0) {
SSET_FOR_EACH (name, &names) {
struct dpif *dpif;
if (dpif_open(name, type, &dpif) == 0) {
struct dpif_dp_stats dp_stats;
if (dpif_get_dp_stats(dpif, &dp_stats) == 0) {
count++;
dp_totals->n_hit += dp_stats.n_hit;
dp_totals->n_missed += dp_stats.n_missed;
dp_totals->n_lost += dp_stats.n_lost;
dp_totals->n_flows += dp_stats.n_flows;
dp_totals->n_mask_hit += dp_stats.n_mask_hit;
dp_totals->n_masks += dp_stats.n_masks;
}
dpif_close(dpif);
}
}
sset_destroy(&names);
}
}
sset_destroy(&types);
return count;
}
/* If there are multiple bridges defined then we need some
minimal artibration to decide which one should send the
global counters. This function allows each sub-agent to
ask if he should do it or not. */
static bool
sflow_global_counters_subid_test(uint32_t subid)
OVS_REQUIRES(mutex)
{
if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) {
/* The role is up for grabs. */
sflow_global_counters_subid = subid;
}
return (sflow_global_counters_subid == subid);
}
static void
sflow_global_counters_subid_clear(uint32_t subid)
OVS_REQUIRES(mutex)
{
if (sflow_global_counters_subid == subid) {
/* The sub-agent that was sending global counters
is going away, so reset to allow another
to take over. */
sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
}
}
static void
sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
SFL_COUNTERS_SAMPLE_TYPE *cs)
OVS_REQUIRES(mutex)
{
struct dpif_sflow *ds = ds_;
SFLCounters_sample_element dp_elem, res_elem;
struct dpif_dp_stats dp_totals;
struct rusage usage;
if (!sflow_global_counters_subid_test(poller->agent->subId)) {
/* Another sub-agent is currently responsible for this. */
return;
}
/* datapath stats */
if (sflow_get_dp_stats(ds, &dp_totals)) {
dp_elem.tag = SFLCOUNTERS_OVSDP;
dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit;
dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed;
dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost;
dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit;
dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows;
dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks;
SFLADD_ELEMENT(cs, &dp_elem);
}
/* resource usage */
getrusage(RUSAGE_SELF, &usage);
res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
res_elem.counterBlock.appResources.user_time
= timeval_to_msec(&usage.ru_utime);
res_elem.counterBlock.appResources.system_time
= timeval_to_msec(&usage.ru_stime);
res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open);
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);
SFLADD_ELEMENT(cs, &res_elem);
sfl_poller_writeCountersSample(poller, cs);
}
static void
sflow_agent_get_counters(void *ds_, SFLPoller *poller,
SFL_COUNTERS_SAMPLE_TYPE *cs)
OVS_REQUIRES(mutex)
{
struct dpif_sflow *ds = ds_;
SFLCounters_sample_element elem, lacp_elem, of_elem, name_elem;
SFLCounters_sample_element eth_elem;
enum netdev_features current;
struct dpif_sflow_port *dsp;
SFLIf_counters *counters;
SFLEthernet_counters* eth_counters;
struct netdev_stats stats;
enum netdev_flags flags;
struct lacp_slave_stats lacp_stats;
const char *ifName;
dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort));
if (!dsp) {
return;
}
elem.tag = SFLCOUNTERS_GENERIC;
counters = &elem.counterBlock.generic;
counters->ifIndex = SFL_DS_INDEX(poller->dsi);
counters->ifType = 6;
if (!netdev_get_features(dsp->ofport->netdev, &current, NULL, NULL, NULL)) {
/* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown,
1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */
counters->ifSpeed = netdev_features_to_bps(current, 0);
counters->ifDirection = (netdev_features_is_full_duplex(current)
? 1 : 2);
} else {
counters->ifSpeed = 100000000;
counters->ifDirection = 0;
}
if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP) {
counters->ifStatus = 1; /* ifAdminStatus up. */
if (netdev_get_carrier(dsp->ofport->netdev)) {
counters->ifStatus |= 2; /* ifOperStatus us. */
}
} else {
counters->ifStatus = 0; /* Down. */
}
/* XXX
1. Is the multicast counter filled in?
2. Does the multicast counter include broadcasts?
3. Does the rx_packets counter include multicasts/broadcasts?
*/
ofproto_port_get_stats(dsp->ofport, &stats);
counters->ifInOctets = stats.rx_bytes;
counters->ifInUcastPkts = stats.rx_packets;
counters->ifInMulticastPkts = stats.multicast;
counters->ifInBroadcastPkts = stats.rx_broadcast_packets;
counters->ifInDiscards = stats.rx_dropped;
counters->ifInErrors = stats.rx_errors;
counters->ifInUnknownProtos = -1;
counters->ifOutOctets = stats.tx_bytes;
counters->ifOutUcastPkts = stats.tx_packets;
counters->ifOutMulticastPkts = stats.tx_multicast_packets;
counters->ifOutBroadcastPkts = stats.tx_broadcast_packets;
counters->ifOutDiscards = stats.tx_dropped;
counters->ifOutErrors = stats.tx_errors;
counters->ifPromiscuousMode = 0;
SFLADD_ELEMENT(cs, &elem);
/* Include LACP counters and identifiers if this port is part of a LAG. */
if (ofproto_port_get_lacp_stats(dsp->ofport, &lacp_stats) == 0) {
memset(&lacp_elem, 0, sizeof lacp_elem);
lacp_elem.tag = SFLCOUNTERS_LACP;
lacp_elem.counterBlock.lacp.actorSystemID =
lacp_stats.dot3adAggPortActorSystemID;
lacp_elem.counterBlock.lacp.partnerSystemID =
lacp_stats.dot3adAggPortPartnerOperSystemID;
lacp_elem.counterBlock.lacp.attachedAggID =
lacp_stats.dot3adAggPortAttachedAggID;
lacp_elem.counterBlock.lacp.portState.v.actorAdmin =
lacp_stats.dot3adAggPortActorAdminState;
lacp_elem.counterBlock.lacp.portState.v.actorOper =
lacp_stats.dot3adAggPortActorOperState;
lacp_elem.counterBlock.lacp.portState.v.partnerAdmin =
lacp_stats.dot3adAggPortPartnerAdminState;
lacp_elem.counterBlock.lacp.portState.v.partnerOper =
lacp_stats.dot3adAggPortPartnerOperState;
lacp_elem.counterBlock.lacp.LACPDUsRx =
lacp_stats.dot3adAggPortStatsLACPDUsRx;
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsRx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsRx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.unknownRx);
lacp_elem.counterBlock.lacp.illegalRx =
lacp_stats.dot3adAggPortStatsIllegalRx;
lacp_elem.counterBlock.lacp.LACPDUsTx =
lacp_stats.dot3adAggPortStatsLACPDUsTx;
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsTx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsTx);
SFLADD_ELEMENT(cs, &lacp_elem);
}
/* Include Port name. */
if ((ifName = netdev_get_name(dsp->ofport->netdev)) != NULL) {
memset(&name_elem, 0, sizeof name_elem);
name_elem.tag = SFLCOUNTERS_PORTNAME;
name_elem.counterBlock.portName.portName.str = (char *)ifName;
name_elem.counterBlock.portName.portName.len = strlen(ifName);
SFLADD_ELEMENT(cs, &name_elem);
}
/* Include OpenFlow DPID and openflow port number. */
memset(&of_elem, 0, sizeof of_elem);
of_elem.tag = SFLCOUNTERS_OPENFLOWPORT;
of_elem.counterBlock.ofPort.datapath_id =
ofproto_get_datapath_id(dsp->ofport->ofproto);
of_elem.counterBlock.ofPort.port_no =
(OVS_FORCE uint32_t)dsp->ofport->ofp_port;
SFLADD_ELEMENT(cs, &of_elem);
/* Include ethernet counters */
memset(&eth_elem, 0, sizeof eth_elem);
eth_elem.tag = SFLCOUNTERS_ETHERNET;
eth_counters = &eth_elem.counterBlock.ethernet;
eth_counters->dot3StatsAlignmentErrors = stats.rx_frame_errors;
eth_counters->dot3StatsFCSErrors = stats.rx_crc_errors;
eth_counters->dot3StatsFrameTooLongs = stats.rx_oversize_errors;
SFL_UNDEF_COUNTER(eth_counters->dot3StatsSingleCollisionFrames);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsMultipleCollisionFrames);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsSQETestErrors);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsDeferredTransmissions);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsLateCollisions);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsExcessiveCollisions);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsInternalMacTransmitErrors);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsCarrierSenseErrors);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsInternalMacReceiveErrors);
SFL_UNDEF_COUNTER(eth_counters->dot3StatsSymbolErrors);
SFLADD_ELEMENT(cs, &eth_elem);
sfl_poller_writeCountersSample(poller, cs);
}
/* Obtains an address to use for the local sFlow agent and stores it into
* '*agent_addr'. Returns true if successful, false on failure.
*
* The sFlow agent address should be a local IP address that is persistent and
* reachable over the network, if possible. The IP address associated with
* 'agent_device' is used if it has one, and otherwise 'control_ip', the IP
* address used to talk to the controller. If the agent device is not
* specified then it is figured out by taking a look at the routing table based
* on 'targets'. */
static bool
sflow_choose_agent_address(const char *agent_device,
const struct sset *targets,
const char *control_ip,
SFLAddress *agent_addr)
{
struct in6_addr ip;
if (agent_device) {
/* If 'agent_device' is the name of a network device, use its IP
* address. */
if (!netdev_get_ip_by_name(agent_device, &ip)) {
goto success;
}
/* If 'agent_device' is itself an IP address, use it. */
struct sockaddr_storage ss;
if (inet_parse_address(agent_device, &ss)) {
ip = ss_get_address(&ss);
goto success;
}
}
/* Otherwise, use an appropriate local IP address for one of the
* collectors' remote IP addresses. */
const char *target;
SSET_FOR_EACH (target, targets) {
struct sockaddr_storage ss;
if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss, true)) {
/* sFlow only supports target in default routing table with
* packet mark zero.
*/
struct in6_addr target_ip = ss_get_address(&ss);
struct in6_addr gw, src = in6addr_any;
char name[IFNAMSIZ];
if (ovs_router_lookup(0, &target_ip, name, &src, &gw)) {
ip = src;
goto success;
}
}
}
struct sockaddr_storage ss;
if (control_ip && inet_parse_address(control_ip, &ss)) {
ip = ss_get_address(&ss);
goto success;
}
VLOG_ERR("could not determine IP address for sFlow agent");
return false;
success:
memset(agent_addr, 0, sizeof *agent_addr);
if (IN6_IS_ADDR_V4MAPPED(&ip)) {
agent_addr->type = SFLADDRESSTYPE_IP_V4;
agent_addr->address.ip_v4.addr
= (OVS_FORCE uint32_t) in6_addr_get_mapped_ipv4(&ip);
} else {
agent_addr->type = SFLADDRESSTYPE_IP_V6;
memcpy(agent_addr->address.ip_v6.addr, ip.s6_addr,
sizeof agent_addr->address.ip_v6.addr);
}
return true;
}
static void
dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
{
if (ds->sflow_agent) {
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
sfl_agent_release(ds->sflow_agent);
free(ds->sflow_agent);
ds->sflow_agent = NULL;
}
collectors_destroy(ds->collectors);
ds->collectors = NULL;
ofproto_sflow_options_destroy(ds->options);
ds->options = NULL;
/* Turn off sampling to save CPU cycles. */
ds->probability = 0;
}
void
dpif_sflow_clear(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
dpif_sflow_clear__(ds);
ovs_mutex_unlock(&mutex);
}
bool
dpif_sflow_is_enabled(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
bool enabled;
ovs_mutex_lock(&mutex);
enabled = ds->collectors != NULL;
ovs_mutex_unlock(&mutex);
return enabled;
}
struct dpif_sflow *
dpif_sflow_create(void)
{
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
struct dpif_sflow *ds;
if (ovsthread_once_start(&once)) {
ovs_mutex_init_recursive(&mutex);
ovsthread_once_done(&once);
}
ds = xcalloc(1, sizeof *ds);
ds->next_tick = time_now() + 1;
hmap_init(&ds->ports);
ds->probability = 0;
ovs_refcount_init(&ds->ref_cnt);
return ds;
}
struct dpif_sflow *
dpif_sflow_ref(const struct dpif_sflow *ds_)
{
struct dpif_sflow *ds = CONST_CAST(struct dpif_sflow *, ds_);
if (ds) {
ovs_refcount_ref(&ds->ref_cnt);
}
return ds;
}
/* 32-bit fraction of packets to sample with. A value of 0 samples no packets,
* a value of %UINT32_MAX samples all packets and intermediate values sample
* intermediate fractions of packets. */
uint32_t
dpif_sflow_get_probability(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
uint32_t probability;
ovs_mutex_lock(&mutex);
probability = ds->probability;
ovs_mutex_unlock(&mutex);
return probability;
}
void
dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) {
struct dpif_sflow_port *dsp, *next;
dpif_sflow_clear(ds);
HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) {
dpif_sflow_del_port__(ds, dsp);
}
hmap_destroy(&ds->ports);
free(ds);
}
}
static void
dpif_sflow_add_poller(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
OVS_REQUIRES(mutex)
{
SFLPoller *poller = sfl_agent_addPoller(ds->sflow_agent, &dsp->dsi, ds,
sflow_agent_get_counters);
sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
sfl_poller_set_bridgePort(poller, odp_to_u32(dsp->odp_port));
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
static enum dpif_sflow_tunnel_type
dpif_sflow_tunnel_type(struct ofport *ofport) {
const char *type = netdev_get_type(ofport->netdev);
if (type) {
if (strcmp(type, "gre") == 0) {
return DPIF_SFLOW_TUNNEL_GRE;
} else if (strcmp(type, "vxlan") == 0) {
return DPIF_SFLOW_TUNNEL_VXLAN;
} else if (strcmp(type, "lisp") == 0) {
return DPIF_SFLOW_TUNNEL_LISP;
} else if (strcmp(type, "geneve") == 0) {
return DPIF_SFLOW_TUNNEL_GENEVE;
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
return DPIF_SFLOW_TUNNEL_UNKNOWN;
}
static uint8_t
dpif_sflow_tunnel_proto(enum dpif_sflow_tunnel_type tunnel_type)
{
/* Default to 0 (IPPROTO_IP), meaning "unknown". */
uint8_t ipproto = 0;
switch(tunnel_type) {
case DPIF_SFLOW_TUNNEL_GRE:
ipproto = IPPROTO_GRE;
break;
case DPIF_SFLOW_TUNNEL_VXLAN:
case DPIF_SFLOW_TUNNEL_LISP:
case DPIF_SFLOW_TUNNEL_GENEVE:
ipproto = IPPROTO_UDP;
case DPIF_SFLOW_TUNNEL_UNKNOWN:
break;
}
return ipproto;
}
void
dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport,
odp_port_t odp_port) OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
int ifindex;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
enum dpif_sflow_tunnel_type tunnel_type;
ovs_mutex_lock(&mutex);
dpif_sflow_del_port(ds, odp_port);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
tunnel_type = dpif_sflow_tunnel_type(ofport);
ifindex = netdev_get_ifindex(ofport->netdev);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
if (ifindex <= 0
&& tunnel_type == DPIF_SFLOW_TUNNEL_UNKNOWN) {
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
/* Not an ifindex port, and not a tunnel port either
* so do not add a cross-reference to it here.
*/
goto out;
}
/* Add to table of ports. */
dsp = xmalloc(sizeof *dsp);
dsp->ofport = ofport;
dsp->odp_port = odp_port;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
dsp->tunnel_type = tunnel_type;
hmap_insert(&ds->ports, &dsp->hmap_node, hash_odp_port(odp_port));
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
if (ifindex > 0) {
/* Add poller for ports that have ifindex. */
SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0);
if (ds->sflow_agent) {
dpif_sflow_add_poller(ds, dsp);
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
} else {
/* Record "ifindex unknown" for the others */
SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, 0, 0);
}
out:
ovs_mutex_unlock(&mutex);
}
static void
dpif_sflow_del_port__(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
OVS_REQUIRES(mutex)
{
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
if (ds->sflow_agent
&& SFL_DS_INDEX(dsp->dsi)) {
sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi);
sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi);
}
hmap_remove(&ds->ports, &dsp->hmap_node);
free(dsp);
}
void
dpif_sflow_del_port(struct dpif_sflow *ds, odp_port_t odp_port)
OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
ovs_mutex_lock(&mutex);
dsp = dpif_sflow_find_port(ds, odp_port);
if (dsp) {
dpif_sflow_del_port__(ds, dsp);
}
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_set_options(struct dpif_sflow *ds,
const struct ofproto_sflow_options *options)
OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
bool options_changed;
SFLReceiver *receiver;
SFLAddress agentIP;
time_t now;
SFLDataSource_instance dsi;
uint32_t dsIndex;
SFLSampler *sampler;
SFLPoller *poller;
ovs_mutex_lock(&mutex);
if (sset_is_empty(&options->targets) || !options->sampling_rate) {
/* No point in doing any work if there are no targets or nothing to
* sample. */
dpif_sflow_clear__(ds);
goto out;
}
options_changed = (!ds->options
|| !ofproto_sflow_options_equal(options, ds->options));
/* Configure collectors if options have changed or if we're shortchanged in
* collectors (which indicates that opening one or more of the configured
* collectors failed, so that we should retry). */
if (options_changed
|| collectors_count(ds->collectors) < sset_count(&options->targets)) {
collectors_destroy(ds->collectors);
collectors_create(&options->targets, SFL_DEFAULT_COLLECTOR_PORT,
&ds->collectors);
if (ds->collectors == NULL) {
VLOG_WARN_RL(&rl, "no collectors could be initialized, "
"sFlow disabled");
dpif_sflow_clear__(ds);
goto out;
}
}
/* Choose agent IP address and agent device (if not yet setup) */
if (!sflow_choose_agent_address(options->agent_device,
&options->targets,
options->control_ip, &agentIP)) {
dpif_sflow_clear__(ds);
goto out;
}
/* Avoid reconfiguring if options didn't change. */
if (!options_changed) {
goto out;
}
ofproto_sflow_options_destroy(ds->options);
ds->options = ofproto_sflow_options_clone(options);
/* Create agent. */
VLOG_INFO("creating sFlow agent %d", options->sub_id);
if (ds->sflow_agent) {
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
sfl_agent_release(ds->sflow_agent);
}
ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
now = time_wall();
sfl_agent_init(ds->sflow_agent,
&agentIP,
options->sub_id,
now, /* Boot time. */
now, /* Current time. */
ds, /* Pointer supplied to callbacks. */
sflow_agent_alloc_cb,
sflow_agent_free_cb,
sflow_agent_error_cb,
sflow_agent_send_packet_cb);
receiver = sfl_agent_addReceiver(ds->sflow_agent);
sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow");
sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff);
/* Set the sampling_rate down in the datapath. */
ds->probability = MAX(1, UINT32_MAX / ds->options->sampling_rate);
/* Add a single sampler for the bridge. This appears as a PHYSICAL_ENTITY
because it is associated with the hypervisor, and interacts with the server
hardware directly. The sub_id is used to distinguish this sampler from
others on other bridges within the same agent. */
dsIndex = 1000 + options->sub_id;
SFL_DS_SET(dsi, SFL_DSCLASS_PHYSICAL_ENTITY, dsIndex, 0);
sampler = sfl_agent_addSampler(ds->sflow_agent, &dsi);
sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, ds->options->sampling_rate);
sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
/* Add a counter poller for the bridge so we can use it to send
global counters such as datapath cache hit/miss stats. */
poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds,
sflow_agent_get_global_counters);
sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
/* Add pollers for the currently known ifindex-ports */
HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
if (SFL_DS_INDEX(dsp->dsi)) {
dpif_sflow_add_poller(ds, dsp);
}
}
out:
ovs_mutex_unlock(&mutex);
}
int
dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds,
odp_port_t odp_port) OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
int ret;
ovs_mutex_lock(&mutex);
dsp = dpif_sflow_find_port(ds, odp_port);
ret = dsp ? SFL_DS_INDEX(dsp->dsi) : 0;
ovs_mutex_unlock(&mutex);
return ret;
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
static void
dpif_sflow_tunnel_v4(uint8_t tunnel_ipproto,
const struct flow_tnl *tunnel,
SFLSampled_ipv4 *ipv4)
{
ipv4->protocol = tunnel_ipproto;
ipv4->tos = tunnel->ip_tos;
ipv4->src_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_src;
ipv4->dst_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_dst;
ipv4->src_port = (OVS_FORCE uint16_t) tunnel->tp_src;
ipv4->dst_port = (OVS_FORCE uint16_t) tunnel->tp_dst;
}
static void
dpif_sflow_push_mpls_lse(struct dpif_sflow_actions *sflow_actions,
ovs_be32 lse)
{
if (sflow_actions->mpls_stack_depth >= FLOW_MAX_MPLS_LABELS) {
sflow_actions->mpls_err = true;
return;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
/* Record the new lse in host-byte-order. */
/* BOS flag will be fixed later when we send stack to sFlow library. */
sflow_actions->mpls_lse[sflow_actions->mpls_stack_depth++] = ntohl(lse);
}
static void
dpif_sflow_pop_mpls_lse(struct dpif_sflow_actions *sflow_actions)
{
if (sflow_actions->mpls_stack_depth == 0) {
sflow_actions->mpls_err = true;
return;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
sflow_actions->mpls_stack_depth--;
}
static void
dpif_sflow_set_mpls(struct dpif_sflow_actions *sflow_actions,
const struct ovs_key_mpls *mpls_key, int n)
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
{
int ii;
if (n > FLOW_MAX_MPLS_LABELS) {
sflow_actions->mpls_err = true;
return;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
for (ii = 0; ii < n; ii++) {
/* Reverse stack order, and use host-byte-order for each lse. */
sflow_actions->mpls_lse[n - ii - 1] = ntohl(mpls_key[ii].mpls_lse);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
sflow_actions->mpls_stack_depth = n;
}
static void
sflow_read_tnl_push_action(const struct nlattr *attr,
struct dpif_sflow_actions *sflow_actions)
{
/* Modeled on lib/odp-util.c: format_odp_tnl_push_header */
const struct ovs_action_push_tnl *data = nl_attr_get(attr);
const struct eth_header *eth = (const struct eth_header *) data->header;
const struct ip_header *ip
= ALIGNED_CAST(const struct ip_header *, eth + 1);
sflow_actions->out_port = data->out_port;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
/* Ethernet. */
/* TODO: SFlow does not currently define a MAC-in-MAC
* encapsulation structure. We could use an extension
* structure to report this.
*/
/* IPv4 */
/* Cannot assume alignment so just use memcpy. */
sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
sflow_actions->tunnel.ip_tos = ip->ip_tos;
sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
/* The tnl_push action can supply the ip_protocol too. */
sflow_actions->tunnel_ipproto = ip->ip_proto;
/* Layer 4 */
if (data->tnl_type == OVS_VPORT_TYPE_VXLAN
|| data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
const struct udp_header *udp = (const struct udp_header *) (ip + 1);
sflow_actions->tunnel.tp_src = udp->udp_src;
sflow_actions->tunnel.tp_dst = udp->udp_dst;
if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
const struct vxlanhdr *vxh = (const struct vxlanhdr *) (udp + 1);
uint64_t tun_id = ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8;
sflow_actions->tunnel.tun_id = htonll(tun_id);
} else {
const struct genevehdr *gnh = (const struct genevehdr *) (udp + 1);
uint64_t tun_id = ntohl(get_16aligned_be32(&gnh->vni)) >> 8;
sflow_actions->tunnel.tun_id = htonll(tun_id);
}
} else if (data->tnl_type == OVS_VPORT_TYPE_GRE) {
const void *l4 = ip + 1;
const struct gre_base_hdr *greh = (const struct gre_base_hdr *) l4;
ovs_16aligned_be32 *options = (ovs_16aligned_be32 *)(greh + 1);
if (greh->flags & htons(GRE_CSUM)) {
options++;
}
if (greh->flags & htons(GRE_KEY)) {
uint64_t tun_id = ntohl(get_16aligned_be32(options));
sflow_actions->tunnel.tun_id = htonll(tun_id);
}
}
}
static void
sflow_read_set_action(const struct nlattr *attr,
struct dpif_sflow_actions *sflow_actions)
{
enum ovs_key_attr type = nl_attr_type(attr);
switch (type) {
case OVS_KEY_ATTR_ENCAP:
if (++sflow_actions->encap_depth > 1) {
/* Do not handle multi-encap for now. */
sflow_actions->tunnel_err = true;
} else {
dpif_sflow_read_actions(NULL,
nl_attr_get(attr), nl_attr_get_size(attr),
sflow_actions, true);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
break;
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_DP_HASH:
case OVS_KEY_ATTR_RECIRC_ID:
break;
case OVS_KEY_ATTR_TUNNEL: {
if (++sflow_actions->encap_depth > 1) {
/* Do not handle multi-encap for now. */
sflow_actions->tunnel_err = true;
} else {
tun-metadata: Manage tunnel TLV mapping table on a per-bridge basis. When using tunnel TLVs (at the moment, this means Geneve options), a controller must first map the class and type onto an appropriate OXM field so that it can be used in OVS flow operations. This table is managed using OpenFlow extensions. The original code that added support for TLVs made the mapping table global as a simplification. However, this is not really logically correct as the OpenFlow management commands are operating on a per-bridge basis. This removes the original limitation to make the table per-bridge. One nice result of this change is that it is generally clearer whether the tunnel metadata is in datapath or OpenFlow format. Rather than allowing ad-hoc format changes and trying to handle both formats in the tunnel metadata functions, the format is more clearly separated by function. Datapaths (both kernel and userspace) use datapath format and it is not changed during the upcall process. At the beginning of action translation, tunnel metadata is converted to OpenFlow format and flows and wildcards are translated back at the end of the process. As an additional benefit, this change improves performance in some flow setup situations by keeping the tunnel metadata in the original packet format in more cases. This helps when copies need to be made as the amount of data touched is only what is present in the packet rather than the maximum amount of metadata supported. Co-authored-by: Madhu Challa <challa@noironetworks.com> Signed-off-by: Madhu Challa <challa@noironetworks.com> Signed-off-by: Jesse Gross <jesse@kernel.org> Acked-by: Ben Pfaff <blp@ovn.org>
2016-04-19 18:36:04 -07:00
if (odp_tun_key_from_attr(attr, &sflow_actions->tunnel)
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
== ODP_FIT_ERROR) {
/* Tunnel parsing error. */
sflow_actions->tunnel_err = true;
}
}
break;
}
case OVS_KEY_ATTR_IN_PORT:
case OVS_KEY_ATTR_ETHERNET:
case OVS_KEY_ATTR_VLAN:
break;
case OVS_KEY_ATTR_MPLS: {
const struct ovs_key_mpls *mpls_key = nl_attr_get(attr);
size_t size = nl_attr_get_size(attr);
dpif_sflow_set_mpls(sflow_actions, mpls_key, size / sizeof *mpls_key);
break;
}
case OVS_KEY_ATTR_ETHERTYPE:
case OVS_KEY_ATTR_IPV4:
if (sflow_actions->encap_depth == 1) {
const struct ovs_key_ipv4 *key = nl_attr_get(attr);
if (key->ipv4_src) {
sflow_actions->tunnel.ip_src = key->ipv4_src;
}
if (key->ipv4_dst) {
sflow_actions->tunnel.ip_dst = key->ipv4_dst;
}
if (key->ipv4_proto) {
sflow_actions->tunnel_ipproto = key->ipv4_proto;
}
if (key->ipv4_tos) {
sflow_actions->tunnel.ip_tos = key->ipv4_tos;
}
if (key->ipv4_ttl) {
sflow_actions->tunnel.ip_tos = key->ipv4_ttl;
}
}
break;
case OVS_KEY_ATTR_IPV6:
/* TODO: parse IPv6 encap. */
break;
/* These have the same structure and format. */
case OVS_KEY_ATTR_TCP:
case OVS_KEY_ATTR_UDP:
case OVS_KEY_ATTR_SCTP:
if (sflow_actions->encap_depth == 1) {
const struct ovs_key_tcp *key = nl_attr_get(attr);
if (key->tcp_src) {
sflow_actions->tunnel.tp_src = key->tcp_src;
}
if (key->tcp_dst) {
sflow_actions->tunnel.tp_dst = key->tcp_dst;
}
}
break;
case OVS_KEY_ATTR_TCP_FLAGS:
case OVS_KEY_ATTR_ICMP:
case OVS_KEY_ATTR_ICMPV6:
case OVS_KEY_ATTR_ARP:
case OVS_KEY_ATTR_ND:
case OVS_KEY_ATTR_ND_EXTENSIONS:
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
case OVS_KEY_ATTR_CT_STATE:
case OVS_KEY_ATTR_CT_ZONE:
case OVS_KEY_ATTR_CT_MARK:
case OVS_KEY_ATTR_CT_LABELS:
datapath: Add original direction conntrack tuple to sw_flow_key. Upstream commit: commit 9dd7f8907c3705dc7a7a375d1c6e30b06e6daffc Author: Jarno Rajahalme <jarno@ovn.org> Date: Thu Feb 9 11:21:59 2017 -0800 openvswitch: Add original direction conntrack tuple to sw_flow_key. Add the fields of the conntrack original direction 5-tuple to struct sw_flow_key. The new fields are initially marked as non-existent, and are populated whenever a conntrack action is executed and either finds or generates a conntrack entry. This means that these fields exist for all packets that were not rejected by conntrack as untrackable. The original tuple fields in the sw_flow_key are filled from the original direction tuple of the conntrack entry relating to the current packet, or from the original direction tuple of the master conntrack entry, if the current conntrack entry has a master. Generally, expected connections of connections having an assigned helper (e.g., FTP), have a master conntrack entry. The main purpose of the new conntrack original tuple fields is to allow matching on them for policy decision purposes, with the premise that the admissibility of tracked connections reply packets (as well as original direction packets), and both direction packets of any related connections may be based on ACL rules applying to the master connection's original direction 5-tuple. This also makes it easier to make policy decisions when the actual packet headers might have been transformed by NAT, as the original direction 5-tuple represents the packet headers before any such transformation. When using the original direction 5-tuple the admissibility of return and/or related packets need not be based on the mere existence of a conntrack entry, allowing separation of admission policy from the established conntrack state. While existence of a conntrack entry is required for admission of the return or related packets, policy changes can render connections that were initially admitted to be rejected or dropped afterwards. If the admission of the return and related packets was based on mere conntrack state (e.g., connection being in an established state), a policy change that would make the connection rejected or dropped would need to find and delete all conntrack entries affected by such a change. When using the original direction 5-tuple matching the affected conntrack entries can be allowed to time out instead, as the established state of the connection would not need to be the basis for packet admission any more. It should be noted that the directionality of related connections may be the same or different than that of the master connection, and neither the original direction 5-tuple nor the conntrack state bits carry this information. If needed, the directionality of the master connection can be stored in master's conntrack mark or labels, which are automatically inherited by the expected related connections. The fact that neither ARP nor ND packets are trackable by conntrack allows mutual exclusion between ARP/ND and the new conntrack original tuple fields. Hence, the IP addresses are overlaid in union with ARP and ND fields. This allows the sw_flow_key to not grow much due to this patch, but it also means that we must be careful to never use the new key fields with ARP or ND packets. ARP is easy to distinguish and keep mutually exclusive based on the ethernet type, but ND being an ICMPv6 protocol requires a bit more attention. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> This patch squashes in minimal amount of OVS userspace code to not break the build. Later patches contain the full userspace support. Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org>
2017-03-08 17:18:22 -08:00
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
case OVS_KEY_ATTR_UNSPEC:
userspace: Switching of L3 packets in L2 pipeline Ports have a new layer3 attribute if they send/receive L3 packets. The packet_type included in structs dp_packet and flow is considered in ofproto-dpif. The classical L2 match fields (dl_src, dl_dst, dl_type, and vlan_tci, vlan_vid, vlan_pcp) now have Ethernet as pre-requisite. A dummy ethernet header is pushed to L3 packets received from L3 ports before the the pipeline processing starts. The ethernet header is popped before sending a packet to a L3 port. For datapath ports that can receive L2 or L3 packets, the packet_type becomes part of the flow key for datapath flows and is handled appropriately in dpif-netdev. In the 'else' branch in flow_put_on_pmd() function, the additional check flow_equal(&match.flow, &netdev_flow->flow) was removed, as a) the dpcls lookup is sufficient to uniquely identify a flow and b) it caused false negatives because the flow in netdev->flow may not properly masked. In dpif_netdev_flow_put() we now use the same method for constructing the netdev_flow_key as the one used when adding the flow to the dplcs to make sure these always match. The function netdev_flow_key_from_flow() used so far was not only inefficient but sometimes caused mismatches and subsequent flow update failures. The kernel datapath does not support the packet_type match field. Instead it encodes the packet type implictly by the presence or absence of the Ethernet attribute in the flow key and mask. This patch filters the PACKET_TYPE attribute out of netlink flow key and mask to be sent to the kernel datapath. Signed-off-by: Lorand Jakab <lojakab@cisco.com> Signed-off-by: Simon Horman <simon.horman@netronome.com> Signed-off-by: Jiri Benc <jbenc@redhat.com> Signed-off-by: Yi Yang <yi.y.yang@intel.com> Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com> Co-authored-by: Zoltan Balogh <zoltan.balogh@ericsson.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2017-06-02 16:16:17 +00:00
case OVS_KEY_ATTR_PACKET_TYPE:
case OVS_KEY_ATTR_NSH:
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
case __OVS_KEY_ATTR_MAX:
default:
break;
}
}
static void
dpif_sflow_capture_input_mpls(const struct flow *flow,
struct dpif_sflow_actions *sflow_actions)
{
if (eth_type_mpls(flow->dl_type)) {
int depth = 0;
int ii;
ovs_be32 lse;
/* Calculate depth by detecting BOS. */
for (ii = 0; ii < FLOW_MAX_MPLS_LABELS; ii++) {
lse = flow->mpls_lse[ii];
depth++;
if (lse & htonl(MPLS_BOS_MASK)) {
break;
}
}
/* Capture stack, reversing stack order, and
* using host-byte-order for each lse. BOS flag
* is ignored for now. It is set later when
* the output stack is encoded.
*/
for (ii = 0; ii < depth; ii++) {
lse = flow->mpls_lse[ii];
sflow_actions->mpls_lse[depth - ii - 1] = ntohl(lse);
}
sflow_actions->mpls_stack_depth = depth;
}
}
void
dpif_sflow_read_actions(const struct flow *flow,
const struct nlattr *actions, size_t actions_len,
struct dpif_sflow_actions *sflow_actions,
bool capture_mpls)
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
{
const struct nlattr *a;
unsigned int left;
if (actions_len == 0) {
/* Packet dropped.*/
return;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
if (flow != NULL && capture_mpls == true) {
/* Make sure the MPLS output stack
* is seeded with the input stack.
*/
dpif_sflow_capture_input_mpls(flow, sflow_actions);
/* XXX when 802.1AD(QinQ) is supported then
* we can do the same with VLAN stacks here
*/
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
enum ovs_action_attr type = nl_attr_type(a);
switch (type) {
case OVS_ACTION_ATTR_OUTPUT:
/* Capture the output port in case we need it
* to get the output tunnel type.
*/
sflow_actions->out_port = nl_attr_get_odp_port(a);
break;
case OVS_ACTION_ATTR_TUNNEL_POP:
/* XXX: Do not handle this for now. It's not clear
* if we should start with encap_depth == 1 when we
* see an input tunnel, or if we should assume
* that the input tunnel was always "popped" if it
* was presented to us decoded in flow->tunnel?
*
* If we do handle this it might look like this,
* as we clear the captured tunnel info and decrement
* the encap_depth:
*
* memset(&sflow_actions->tunnel, 0, sizeof struct flow_tnl);
* sflow_actions->tunnel_ipproto = 0;
* --sflow_actions->encap_depth;
*
* but for now just disable the tunnel annotation:
*/
sflow_actions->tunnel_err = true;
break;
case OVS_ACTION_ATTR_TUNNEL_PUSH:
/* XXX: This actions appears to come with it's own
* OUTPUT action, so should it be regarded as having
* an implicit "pop" following it too? Put another
* way, would two tnl_push() actions in succession
* result in a packet with two layers of encap?
*/
if (++sflow_actions->encap_depth > 1) {
/* Do not handle multi-encap for now. */
sflow_actions->tunnel_err = true;
} else {
sflow_read_tnl_push_action(a, sflow_actions);
}
break;
case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_HASH:
Add support for connection tracking. This patch adds a new action and fields to OVS that allow connection tracking to be performed. This support works in conjunction with the Linux kernel support merged into the Linux-4.3 development cycle. Packets have two possible states with respect to connection tracking: Untracked packets have not previously passed through the connection tracker, while tracked packets have previously been through the connection tracker. For OpenFlow pipeline processing, untracked packets can become tracked, and they will remain tracked until the end of the pipeline. Tracked packets cannot become untracked. Connections can be unknown, uncommitted, or committed. Packets which are untracked have unknown connection state. To know the connection state, the packet must become tracked. Uncommitted connections have no connection state stored about them, so it is only possible for the connection tracker to identify whether they are a new connection or whether they are invalid. Committed connections have connection state stored beyond the lifetime of the packet, which allows later packets in the same connection to be identified as part of the same established connection, or related to an existing connection - for instance ICMP error responses. The new 'ct' action transitions the packet from "untracked" to "tracked" by sending this flow through the connection tracker. The following parameters are supported initally: - "commit": When commit is executed, the connection moves from uncommitted state to committed state. This signals that information about the connection should be stored beyond the lifetime of the packet within the pipeline. This allows future packets in the same connection to be recognized as part of the same "established" (est) connection, as well as identifying packets in the reply (rpl) direction, or packets related to an existing connection (rel). - "zone=[u16|NXM]": Perform connection tracking in the zone specified. Each zone is an independent connection tracking context. When the "commit" parameter is used, the connection will only be committed in the specified zone, and not in other zones. This is 0 by default. - "table=NUMBER": Fork pipeline processing in two. The original instance of the packet will continue processing the current actions list as an untracked packet. An additional instance of the packet will be sent to the connection tracker, which will be re-injected into the OpenFlow pipeline to resume processing in the specified table, with the ct_state and other ct match fields set. If the table is not specified, then the packet is submitted to the connection tracker, but the pipeline does not fork and the ct match fields are not populated. It is strongly recommended to specify a table later than the current table to prevent loops. When the "table" option is used, the packet that continues processing in the specified table will have the ct_state populated. The ct_state may have any of the following flags set: - Tracked (trk): Connection tracking has occurred. - Reply (rpl): The flow is in the reply direction. - Invalid (inv): The connection tracker couldn't identify the connection. - New (new): This is the beginning of a new connection. - Established (est): This is part of an already existing connection. - Related (rel): This connection is related to an existing connection. For more information, consult the ovs-ofctl(8) man pages. Below is a simple example flow table to allow outbound TCP traffic from port 1 and drop traffic from port 2 that was not initiated by port 1: table=0,priority=1,action=drop table=0,arp,action=normal table=0,in_port=1,tcp,ct_state=-trk,action=ct(commit,zone=9),2 table=0,in_port=2,tcp,ct_state=-trk,action=ct(zone=9,table=1) table=1,in_port=2,ct_state=+trk+est,tcp,action=1 table=1,in_port=2,ct_state=+trk+new,tcp,action=drop Based on original design by Justin Pettit, contributions from Thomas Graf and Daniele Di Proietto. Signed-off-by: Joe Stringer <joestringer@nicira.com> Acked-by: Jarno Rajahalme <jrajahalme@nicira.com> Acked-by: Ben Pfaff <blp@nicira.com>
2015-08-11 10:56:09 -07:00
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_METER:
break;
case OVS_ACTION_ATTR_SET_MASKED:
/* TODO: apply mask. XXX: Are we likely to see this? */
break;
case OVS_ACTION_ATTR_SET:
sflow_read_set_action(nl_attr_get(a), sflow_actions);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
case OVS_ACTION_ATTR_POP_VLAN:
/* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
* construct a VLAN-stack. The sFlow user-action cookie already
* captures the egress VLAN ID so there is nothing more to do here.
*/
break;
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
if (mpls) {
dpif_sflow_push_mpls_lse(sflow_actions, mpls->mpls_lse);
}
break;
}
case OVS_ACTION_ATTR_POP_MPLS: {
dpif_sflow_pop_mpls_lse(sflow_actions);
break;
}
case OVS_ACTION_ATTR_PUSH_ETH:
case OVS_ACTION_ATTR_POP_ETH:
/* TODO: SFlow does not currently define a MAC-in-MAC
* encapsulation structure. We could use an extension
* structure to report this.
*/
break;
case OVS_ACTION_ATTR_CLONE:
if (flow != NULL) {
dpif_sflow_read_actions(flow, nl_attr_get(a), nl_attr_get_size(a),
sflow_actions, false);
}
break;
case OVS_ACTION_ATTR_SAMPLE:
case OVS_ACTION_ATTR_PUSH_NSH:
case OVS_ACTION_ATTR_POP_NSH:
case OVS_ACTION_ATTR_UNSPEC:
case __OVS_ACTION_ATTR_MAX:
default:
break;
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
}
static void
dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
uint32_t *mpls_lse_buf,
const struct dpif_sflow_actions *sflow_actions)
{
/* Put the MPLS stack back into "packet header" order,
* and make sure the BOS flag is set correctly on the last
* one. Each lse is still in host-byte-order.
*/
int ii;
uint32_t lse;
stack->depth = sflow_actions->mpls_stack_depth;
stack->stack = mpls_lse_buf;
for (ii = 0; ii < stack->depth; ii++) {
lse = sflow_actions->mpls_lse[stack->depth - ii - 1];
stack->stack[ii] = (lse & ~MPLS_BOS_MASK);
}
stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;
}
/* Extract the output port count from the user action cookie.
* See http://sflow.org/sflow_version_5.txt "Input/Output port information"
*/
static uint32_t
dpif_sflow_cookie_num_outputs(const struct user_action_cookie *cookie)
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
{
uint32_t format = cookie->sflow.output & 0xC0000000;
uint32_t port_n = cookie->sflow.output & 0x3FFFFFFF;
if (format == 0) {
return port_n ? 1 : 0;
}
else if (format == 0x80000000) {
return port_n;
}
return 0;
}
void
dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
const struct flow *flow, odp_port_t odp_in_port,
const struct user_action_cookie *cookie,
const struct dpif_sflow_actions *sflow_actions)
OVS_EXCLUDED(mutex)
{
SFL_FLOW_SAMPLE_TYPE fs;
SFLFlow_sample_element hdrElem;
SFLSampled_header *header;
SFLFlow_sample_element switchElem;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
uint8_t tnlInProto, tnlOutProto;
SFLFlow_sample_element tnlInElem, tnlOutElem;
SFLFlow_sample_element vniInElem, vniOutElem;
SFLFlow_sample_element mplsElem;
uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
sflow: Fix sFlow sampling structure. According to Neil McKee, in an email archived at http://openvswitch.org/pipermail/dev_openvswitch.org/2010-January/000934.html: The containment rule is that a given sflow-datasource (sampler or poller) should be scoped within only one sflow-agent (or sub-agent). So the issue arrises when you have two switches/datapaths defined on the same host being managed with the same IP address: each switch is a separate sub-agent, so they can run independently (e.g. with their own sequence numbers) but they can't both claim to speak for the same sflow-datasource. Specifically, they can't both represent the <ifindex>:0 data-source. This containment rule is necessary so that the sFlow collector can scale and combine the results accurately. One option would be to stick with the <ifindex>:0 data-source but elevate it to be global across all bridges, with a global sample_pool and a global sflow_agent. Not tempting. Better to go the other way and allow each interface to have it's own sampler, just as it already has it's own poller. The ifIndex numbers are globally unique across all switches/datapaths on the host, so the containment is now clean. Datasource <ifindex>:5 might be on one switch, whille <ifindex>:7 can be on another. Other benefits are that 1) you can support the option of overriding the default sampling-rate on an interface-by-interface basis, and 2) this is how most sFlow implementations are coded, so there will be no surprises or interoperability issues with any sFlow collectors out there. This commit implements the approach suggested by Neil. This commit uses an atomic_t to represent the sampling pool. This is because we do want access to it to be atomic, but we expect that it will "mostly" be accessed from a single CPU at a time. Perhaps this is a bad assumption; we can always switch to another form of synchronization later. CC: Neil McKee <neil.mckee@inmon.com>
2010-01-20 13:52:42 -08:00
SFLSampler *sampler;
struct dpif_sflow_port *in_dsp;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
struct dpif_sflow_port *out_dsp;
ovs_be16 vlan_tci;
ovs_mutex_lock(&mutex);
sampler = ds->sflow_agent->samplers;
if (!sampler) {
goto out;
}
/* Build a flow sample. */
memset(&fs, 0, sizeof fs);
/* Look up the input ifIndex if this port has one. Otherwise just
* leave it as 0 (meaning 'unknown') and continue. */
in_dsp = dpif_sflow_find_port(ds, odp_in_port);
if (in_dsp) {
fs.input = SFL_DS_INDEX(in_dsp->dsi);
sflow: Fix sFlow sampling structure. According to Neil McKee, in an email archived at http://openvswitch.org/pipermail/dev_openvswitch.org/2010-January/000934.html: The containment rule is that a given sflow-datasource (sampler or poller) should be scoped within only one sflow-agent (or sub-agent). So the issue arrises when you have two switches/datapaths defined on the same host being managed with the same IP address: each switch is a separate sub-agent, so they can run independently (e.g. with their own sequence numbers) but they can't both claim to speak for the same sflow-datasource. Specifically, they can't both represent the <ifindex>:0 data-source. This containment rule is necessary so that the sFlow collector can scale and combine the results accurately. One option would be to stick with the <ifindex>:0 data-source but elevate it to be global across all bridges, with a global sample_pool and a global sflow_agent. Not tempting. Better to go the other way and allow each interface to have it's own sampler, just as it already has it's own poller. The ifIndex numbers are globally unique across all switches/datapaths on the host, so the containment is now clean. Datasource <ifindex>:5 might be on one switch, whille <ifindex>:7 can be on another. Other benefits are that 1) you can support the option of overriding the default sampling-rate on an interface-by-interface basis, and 2) this is how most sFlow implementations are coded, so there will be no surprises or interoperability issues with any sFlow collectors out there. This commit implements the approach suggested by Neil. This commit uses an atomic_t to represent the sampling pool. This is because we do want access to it to be atomic, but we expect that it will "mostly" be accessed from a single CPU at a time. Perhaps this is a bad assumption; we can always switch to another form of synchronization later. CC: Neil McKee <neil.mckee@inmon.com>
2010-01-20 13:52:42 -08:00
}
/* Make the assumption that the random number generator in the
* datapath converges to the configured mean, and just increment the
* samplePool by the configured sampling rate every time. */
sampler->samplePool += sfl_sampler_get_sFlowFsPacketSamplingRate(sampler);
/* Sampled header. */
memset(&hdrElem, 0, sizeof hdrElem);
hdrElem.tag = SFLFLOW_HEADER;
header = &hdrElem.flowType.header;
header->header_protocol = SFLHEADER_ETHERNET_ISO8023;
/* The frame_length should include the Ethernet FCS (4 bytes),
* but it has already been stripped, so we need to add 4 here. */
header->frame_length = dp_packet_size(packet) + 4;
/* Ethernet FCS stripped off. */
header->stripped = 4;
header->header_length = MIN(dp_packet_size(packet),
datapath: Report kernel's flow key when passing packets up to userspace. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. This commit takes one step in that direction by making the kernel report its idea of the flow that a packet belongs to whenever it passes a packet up to userspace. This means that userspace can intelligently figure out what to do: - If userspace's notion of the flow for the packet matches the kernel's, then nothing special is necessary. - If the kernel has a more specific notion for the flow than userspace, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary: userspace can still set up the flow in the usual way. - If userspace has a more specific notion for the flow than the kernel, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. (This case is bad from a performance point of view, but at least it is correct.) This commit does not actually make userspace flexible enough to handle changes in the kernel flow key structure, although userspace does now have enough information to do that intelligently. This will have to wait for later commits. This commit is bigger than it would otherwise be because it is rolled together with changing "struct odp_msg" to a sequence of Netlink attributes. The alternative, to do each of those changes in a separate patch, seemed like overkill because it meant that either we would have to introduce and then kill off Netlink attributes for in_port and tun_id, if Netlink conversion went first, or shove yet another variable-length header into the stuff already after odp_msg, if adding the flow key to odp_msg went first. This commit will slow down performance of checksumming packets sent up to userspace. I'm not entirely pleased with how I did it. I considered a couple of alternatives, but none of them seemed that much better. Suggestions welcome. Not changing anything wasn't an option, unfortunately. At any rate some slowdown will become unavoidable when OVS actually starts using Netlink instead of just Netlink framing. (Actually, I thought of one option where we could avoid that: make userspace do the checksum instead, by passing csum_start and csum_offset as part of what goes to userspace. But that's not perfect either.) Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
sampler->sFlowFsMaximumHeaderSize);
header->header_bytes = dp_packet_data(packet);
/* Add extended switch element. */
memset(&switchElem, 0, sizeof(switchElem));
switchElem.tag = SFLFLOW_EX_SWITCH;
switchElem.flowType.sw.src_vlan = vlan_tci_to_vid(flow->vlans[0].tci);
switchElem.flowType.sw.src_priority = vlan_tci_to_pcp(flow->vlans[0].tci);
/* Retrieve data from user_action_cookie. */
vlan_tci = cookie->sflow.vlan_tci;
switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(vlan_tci);
switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(vlan_tci);
fs.output = cookie->sflow.output;
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
/* Input tunnel. */
if (flow->tunnel.ip_dst) {
memset(&tnlInElem, 0, sizeof(tnlInElem));
tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
tnlInProto = in_dsp ? dpif_sflow_tunnel_proto(in_dsp->tunnel_type) : 0;
dpif_sflow_tunnel_v4(tnlInProto,
&flow->tunnel,
&tnlInElem.flowType.ipv4);
SFLADD_ELEMENT(&fs, &tnlInElem);
if (flow->tunnel.tun_id) {
memset(&vniInElem, 0, sizeof(vniInElem));
vniInElem.tag = SFLFLOW_EX_VNI_INGRESS;
vniInElem.flowType.tunnel_vni.vni
= ntohll(flow->tunnel.tun_id);
SFLADD_ELEMENT(&fs, &vniInElem);
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
/* Output tunnel. */
if (sflow_actions
&& sflow_actions->encap_depth == 1
&& !sflow_actions->tunnel_err
&& dpif_sflow_cookie_num_outputs(cookie) == 1) {
tnlOutProto = sflow_actions->tunnel_ipproto;
if (tnlOutProto == 0) {
/* Try to infer the ip-protocol from the output port. */
if (sflow_actions->out_port != ODPP_NONE) {
out_dsp = dpif_sflow_find_port(ds, sflow_actions->out_port);
if (out_dsp) {
tnlOutProto = dpif_sflow_tunnel_proto(out_dsp->tunnel_type);
}
}
}
memset(&tnlOutElem, 0, sizeof(tnlOutElem));
tnlOutElem.tag = SFLFLOW_EX_IPV4_TUNNEL_EGRESS;
dpif_sflow_tunnel_v4(tnlOutProto,
&sflow_actions->tunnel,
&tnlOutElem.flowType.ipv4);
SFLADD_ELEMENT(&fs, &tnlOutElem);
if (sflow_actions->tunnel.tun_id) {
memset(&vniOutElem, 0, sizeof(vniOutElem));
vniOutElem.tag = SFLFLOW_EX_VNI_EGRESS;
vniOutElem.flowType.tunnel_vni.vni
= ntohll(sflow_actions->tunnel.tun_id);
SFLADD_ELEMENT(&fs, &vniOutElem);
}
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
/* MPLS output label stack. */
if (sflow_actions
&& sflow_actions->mpls_stack_depth > 0
&& !sflow_actions->mpls_err
&& dpif_sflow_cookie_num_outputs(cookie) == 1) {
memset(&mplsElem, 0, sizeof(mplsElem));
mplsElem.tag = SFLFLOW_EX_MPLS;
dpif_sflow_encode_mpls_stack(&mplsElem.flowType.mpls.out_stack,
mpls_lse_buf,
sflow_actions);
SFLADD_ELEMENT(&fs, &mplsElem);
Extend sFlow agent to report tunnel and MPLS structures Packets are still sampled at ingress only, so the egress tunnel and/or MPLS structures are only included when there is just 1 output port. The actions are either provided by the datapath in the sample upcall or looked up in the userspace cache. The former is preferred because it is more reliable and does not present any new demands or constraints on the userspace cache, however the code falls back on the userspace lookup so that this solution can work with existing kernel datapath modules. If the lookup fails it is not critical: the compiled user-action-cookie is still available and provides the essential output port and output VLAN forwarding information just as before. The openvswitch actions can express almost any tunneling/mangling so the only totally faithful representation would be to somehow encode the whole list of flow actions in the sFlow output. However the standard sFlow tunnel structures can express most common real-world scenarios, so in parsing the actions we look for those and skip the encoding if we see anything unusual. For example, a single set(tunnel()) or tnl_push() is interpreted, but if a second such action is encountered then the egress tunnel reporting is suppressed. The sFlow standard allows "best effort" encoding so that if a field is not knowable or too onerous to look up then it can be left out. This is often the case for the layer-4 source port or even the src ip address of a tunnel. The assumption is that monitoring is enabled everywhere so a missing field can typically be seen at ingress to the next switch in the path. This patch also adds unit tests to check the sFlow encoding of set(tunnel()), tnl_push() and push_mpls() actions. The netlink attribute to request that actions be included in the upcall from the datapath is inserted for sFlow sampling only. To make that option be explicit would require further changes to the printing and parsing of actions in lib/odp-util.c, and to scripts in the test suite. Further enhancements to report on 802.1AD QinQ, 64-bit tunnel IDs, and NAT transformations can follow in future patches that make only incremental changes. Signed-off-by: Neil McKee <neil.mckee@inmon.com> [blp@nicira.com made stylistic and semantic changes] Signed-off-by: Ben Pfaff <blp@nicira.com>
2015-07-17 21:37:02 -07:00
}
/* Submit the flow sample to be encoded into the next datagram. */
SFLADD_ELEMENT(&fs, &hdrElem);
SFLADD_ELEMENT(&fs, &switchElem);
sfl_sampler_writeFlowSample(sampler, &fs);
out:
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_run(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
if (ds->collectors != NULL) {
time_t now = time_now();
route_table_run();
if (now >= ds->next_tick) {
sfl_agent_tick(ds->sflow_agent, time_wall());
ds->next_tick = now + 1;
}
}
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_wait(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
if (ds->collectors != NULL) {
poll_timer_wait_until(ds->next_tick * 1000LL);
}
ovs_mutex_unlock(&mutex);
}