2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-30 05:47:55 +00:00
ovs/ofproto/ofproto-dpif-sflow.c

717 lines
22 KiB
C
Raw Normal View History

/*
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
* Copyright (c) 2009 InMon Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "ofproto-dpif-sflow.h"
#include <inttypes.h>
#include <sys/socket.h>
#include <net/if.h>
#include <stdlib.h>
#include "collectors.h"
#include "compiler.h"
#include "dpif.h"
#include "hash.h"
#include "hmap.h"
#include "netdev.h"
#include "netlink.h"
#include "ofpbuf.h"
#include "ofproto.h"
#include "packets.h"
#include "poll-loop.h"
#include "ovs-router.h"
#include "route-table.h"
#include "sflow_api.h"
#include "socket-util.h"
#include "timeval.h"
#include "vlog.h"
#include "lib/odp-util.h"
#include "ofproto-provider.h"
#include "lacp.h"
VLOG_DEFINE_THIS_MODULE(sflow);
static struct ovs_mutex mutex;
struct dpif_sflow_port {
struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
struct ofport *ofport; /* To retrive port stats. */
odp_port_t odp_port;
};
struct dpif_sflow {
struct collectors *collectors;
SFLAgent *sflow_agent;
struct ofproto_sflow_options *options;
time_t next_tick;
size_t n_flood, n_all;
struct hmap ports; /* Contains "struct dpif_sflow_port"s. */
uint32_t probability;
struct ovs_refcount ref_cnt;
};
static void dpif_sflow_del_port__(struct dpif_sflow *,
struct dpif_sflow_port *);
#define RECEIVER_INDEX 1
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
static bool
nullable_string_is_equal(const char *a, const char *b)
{
return a ? b && !strcmp(a, b) : !b;
}
static bool
ofproto_sflow_options_equal(const struct ofproto_sflow_options *a,
const struct ofproto_sflow_options *b)
{
return (sset_equals(&a->targets, &b->targets)
&& a->sampling_rate == b->sampling_rate
&& a->polling_interval == b->polling_interval
&& a->header_len == b->header_len
&& a->sub_id == b->sub_id
&& nullable_string_is_equal(a->agent_device, b->agent_device)
&& nullable_string_is_equal(a->control_ip, b->control_ip));
}
static struct ofproto_sflow_options *
ofproto_sflow_options_clone(const struct ofproto_sflow_options *old)
{
struct ofproto_sflow_options *new = xmemdup(old, sizeof *old);
sset_clone(&new->targets, &old->targets);
new->agent_device = old->agent_device ? xstrdup(old->agent_device) : NULL;
new->control_ip = old->control_ip ? xstrdup(old->control_ip) : NULL;
return new;
}
static void
ofproto_sflow_options_destroy(struct ofproto_sflow_options *options)
{
if (options) {
sset_destroy(&options->targets);
free(options->agent_device);
free(options->control_ip);
free(options);
}
}
/* sFlow library callback to allocate memory. */
static void *
sflow_agent_alloc_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
size_t bytes)
{
return calloc(1, bytes);
}
/* sFlow library callback to free memory. */
static int
sflow_agent_free_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
void *obj)
{
free(obj);
return 0;
}
/* sFlow library callback to report error. */
static void
sflow_agent_error_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
char *msg)
{
VLOG_WARN("sFlow agent error: %s", msg);
}
/* sFlow library callback to send datagram. */
static void
sflow_agent_send_packet_cb(void *ds_, SFLAgent *agent OVS_UNUSED,
SFLReceiver *receiver OVS_UNUSED, u_char *pkt,
uint32_t pktLen)
{
struct dpif_sflow *ds = ds_;
collectors_send(ds->collectors, pkt, pktLen);
}
static struct dpif_sflow_port *
dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
OVS_REQUIRES(mutex)
{
struct dpif_sflow_port *dsp;
HMAP_FOR_EACH_IN_BUCKET (dsp, hmap_node, hash_odp_port(odp_port),
&ds->ports) {
if (dsp->odp_port == odp_port) {
return dsp;
}
}
return NULL;
}
static void
sflow_agent_get_counters(void *ds_, SFLPoller *poller,
SFL_COUNTERS_SAMPLE_TYPE *cs)
OVS_REQUIRES(mutex)
{
struct dpif_sflow *ds = ds_;
SFLCounters_sample_element elem, lacp_elem, of_elem, name_elem;
enum netdev_features current;
struct dpif_sflow_port *dsp;
SFLIf_counters *counters;
struct netdev_stats stats;
enum netdev_flags flags;
struct lacp_slave_stats lacp_stats;
const char *ifName;
dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort));
if (!dsp) {
return;
}
elem.tag = SFLCOUNTERS_GENERIC;
counters = &elem.counterBlock.generic;
counters->ifIndex = SFL_DS_INDEX(poller->dsi);
counters->ifType = 6;
if (!netdev_get_features(dsp->ofport->netdev, &current, NULL, NULL, NULL)) {
/* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown,
1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */
counters->ifSpeed = netdev_features_to_bps(current, 0);
counters->ifDirection = (netdev_features_is_full_duplex(current)
? 1 : 2);
} else {
counters->ifSpeed = 100000000;
counters->ifDirection = 0;
}
if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP) {
counters->ifStatus = 1; /* ifAdminStatus up. */
if (netdev_get_carrier(dsp->ofport->netdev)) {
counters->ifStatus |= 2; /* ifOperStatus us. */
}
} else {
counters->ifStatus = 0; /* Down. */
}
/* XXX
1. Is the multicast counter filled in?
2. Does the multicast counter include broadcasts?
3. Does the rx_packets counter include multicasts/broadcasts?
*/
ofproto_port_get_stats(dsp->ofport, &stats);
counters->ifInOctets = stats.rx_bytes;
counters->ifInUcastPkts = stats.rx_packets;
counters->ifInMulticastPkts = stats.multicast;
counters->ifInBroadcastPkts = -1;
counters->ifInDiscards = stats.rx_dropped;
counters->ifInErrors = stats.rx_errors;
counters->ifInUnknownProtos = -1;
counters->ifOutOctets = stats.tx_bytes;
counters->ifOutUcastPkts = stats.tx_packets;
counters->ifOutMulticastPkts = -1;
counters->ifOutBroadcastPkts = -1;
counters->ifOutDiscards = stats.tx_dropped;
counters->ifOutErrors = stats.tx_errors;
counters->ifPromiscuousMode = 0;
SFLADD_ELEMENT(cs, &elem);
/* Include LACP counters and identifiers if this port is part of a LAG. */
if (ofproto_port_get_lacp_stats(dsp->ofport, &lacp_stats) == 0) {
memset(&lacp_elem, 0, sizeof lacp_elem);
lacp_elem.tag = SFLCOUNTERS_LACP;
memcpy(&lacp_elem.counterBlock.lacp.actorSystemID,
lacp_stats.dot3adAggPortActorSystemID,
ETH_ADDR_LEN);
memcpy(&lacp_elem.counterBlock.lacp.partnerSystemID,
lacp_stats.dot3adAggPortPartnerOperSystemID,
ETH_ADDR_LEN);
lacp_elem.counterBlock.lacp.attachedAggID =
lacp_stats.dot3adAggPortAttachedAggID;
lacp_elem.counterBlock.lacp.portState.v.actorAdmin =
lacp_stats.dot3adAggPortActorAdminState;
lacp_elem.counterBlock.lacp.portState.v.actorOper =
lacp_stats.dot3adAggPortActorOperState;
lacp_elem.counterBlock.lacp.portState.v.partnerAdmin =
lacp_stats.dot3adAggPortPartnerAdminState;
lacp_elem.counterBlock.lacp.portState.v.partnerOper =
lacp_stats.dot3adAggPortPartnerOperState;
lacp_elem.counterBlock.lacp.LACPDUsRx =
lacp_stats.dot3adAggPortStatsLACPDUsRx;
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsRx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsRx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.unknownRx);
lacp_elem.counterBlock.lacp.illegalRx =
lacp_stats.dot3adAggPortStatsIllegalRx;
lacp_elem.counterBlock.lacp.LACPDUsTx =
lacp_stats.dot3adAggPortStatsLACPDUsTx;
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsTx);
SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsTx);
SFLADD_ELEMENT(cs, &lacp_elem);
}
/* Include Port name. */
if ((ifName = netdev_get_name(dsp->ofport->netdev)) != NULL) {
memset(&name_elem, 0, sizeof name_elem);
name_elem.tag = SFLCOUNTERS_PORTNAME;
name_elem.counterBlock.portName.portName.str = (char *)ifName;
name_elem.counterBlock.portName.portName.len = strlen(ifName);
SFLADD_ELEMENT(cs, &name_elem);
}
/* Include OpenFlow DPID and openflow port number. */
memset(&of_elem, 0, sizeof of_elem);
of_elem.tag = SFLCOUNTERS_OPENFLOWPORT;
of_elem.counterBlock.ofPort.datapath_id =
ofproto_get_datapath_id(dsp->ofport->ofproto);
of_elem.counterBlock.ofPort.port_no =
(OVS_FORCE uint32_t)dsp->ofport->ofp_port;
SFLADD_ELEMENT(cs, &of_elem);
sfl_poller_writeCountersSample(poller, cs);
}
/* Obtains an address to use for the local sFlow agent and stores it into
* '*agent_addr'. Returns true if successful, false on failure.
*
* The sFlow agent address should be a local IP address that is persistent and
* reachable over the network, if possible. The IP address associated with
* 'agent_device' is used if it has one, and otherwise 'control_ip', the IP
* address used to talk to the controller. If the agent device is not
* specified then it is figured out by taking a look at the routing table based
* on 'targets'. */
static bool
sflow_choose_agent_address(const char *agent_device,
const struct sset *targets,
const char *control_ip,
SFLAddress *agent_addr)
{
const char *target;
struct in_addr in4;
memset(agent_addr, 0, sizeof *agent_addr);
agent_addr->type = SFLADDRESSTYPE_IP_V4;
if (agent_device) {
if (!netdev_get_in4_by_name(agent_device, &in4)) {
goto success;
}
}
SSET_FOR_EACH (target, targets) {
union {
struct sockaddr_storage ss;
struct sockaddr_in sin;
} sa;
char name[IFNAMSIZ];
if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &sa.ss)
&& sa.ss.ss_family == AF_INET) {
ovs_be32 gw;
if (ovs_router_lookup(sa.sin.sin_addr.s_addr, name, &gw)
&& !netdev_get_in4_by_name(name, &in4)) {
goto success;
}
}
}
if (control_ip && !lookup_ip(control_ip, &in4)) {
goto success;
}
VLOG_ERR("could not determine IP address for sFlow agent");
return false;
success:
agent_addr->address.ip_v4.addr = (OVS_FORCE uint32_t) in4.s_addr;
return true;
}
static void
dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
{
if (ds->sflow_agent) {
sfl_agent_release(ds->sflow_agent);
free(ds->sflow_agent);
ds->sflow_agent = NULL;
}
collectors_destroy(ds->collectors);
ds->collectors = NULL;
ofproto_sflow_options_destroy(ds->options);
ds->options = NULL;
/* Turn off sampling to save CPU cycles. */
ds->probability = 0;
}
void
dpif_sflow_clear(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
dpif_sflow_clear__(ds);
ovs_mutex_unlock(&mutex);
}
bool
dpif_sflow_is_enabled(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
bool enabled;
ovs_mutex_lock(&mutex);
enabled = ds->collectors != NULL;
ovs_mutex_unlock(&mutex);
return enabled;
}
struct dpif_sflow *
dpif_sflow_create(void)
{
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
struct dpif_sflow *ds;
if (ovsthread_once_start(&once)) {
ovs_mutex_init_recursive(&mutex);
ovsthread_once_done(&once);
}
ds = xcalloc(1, sizeof *ds);
ds->next_tick = time_now() + 1;
hmap_init(&ds->ports);
ds->probability = 0;
ovs_refcount_init(&ds->ref_cnt);
return ds;
}
struct dpif_sflow *
dpif_sflow_ref(const struct dpif_sflow *ds_)
{
struct dpif_sflow *ds = CONST_CAST(struct dpif_sflow *, ds_);
if (ds) {
ovs_refcount_ref(&ds->ref_cnt);
}
return ds;
}
/* 32-bit fraction of packets to sample with. A value of 0 samples no packets,
* a value of %UINT32_MAX samples all packets and intermediate values sample
* intermediate fractions of packets. */
uint32_t
dpif_sflow_get_probability(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
uint32_t probability;
ovs_mutex_lock(&mutex);
probability = ds->probability;
ovs_mutex_unlock(&mutex);
return probability;
}
void
dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) {
struct dpif_sflow_port *dsp, *next;
dpif_sflow_clear(ds);
HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) {
dpif_sflow_del_port__(ds, dsp);
}
hmap_destroy(&ds->ports);
free(ds);
}
}
static void
dpif_sflow_add_poller(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
OVS_REQUIRES(mutex)
{
SFLPoller *poller = sfl_agent_addPoller(ds->sflow_agent, &dsp->dsi, ds,
sflow_agent_get_counters);
sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
sfl_poller_set_bridgePort(poller, odp_to_u32(dsp->odp_port));
}
void
dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport,
odp_port_t odp_port) OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
int ifindex;
ovs_mutex_lock(&mutex);
dpif_sflow_del_port(ds, odp_port);
ifindex = netdev_get_ifindex(ofport->netdev);
if (ifindex <= 0) {
/* Not an ifindex port, so do not add a cross-reference to it here */
goto out;
}
/* Add to table of ports. */
dsp = xmalloc(sizeof *dsp);
dsp->ofport = ofport;
dsp->odp_port = odp_port;
SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0);
hmap_insert(&ds->ports, &dsp->hmap_node, hash_odp_port(odp_port));
/* Add poller. */
if (ds->sflow_agent) {
dpif_sflow_add_poller(ds, dsp);
}
out:
ovs_mutex_unlock(&mutex);
}
static void
dpif_sflow_del_port__(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
OVS_REQUIRES(mutex)
{
if (ds->sflow_agent) {
sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi);
sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi);
}
hmap_remove(&ds->ports, &dsp->hmap_node);
free(dsp);
}
void
dpif_sflow_del_port(struct dpif_sflow *ds, odp_port_t odp_port)
OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
ovs_mutex_lock(&mutex);
dsp = dpif_sflow_find_port(ds, odp_port);
if (dsp) {
dpif_sflow_del_port__(ds, dsp);
}
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_set_options(struct dpif_sflow *ds,
const struct ofproto_sflow_options *options)
OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
bool options_changed;
SFLReceiver *receiver;
SFLAddress agentIP;
time_t now;
SFLDataSource_instance dsi;
uint32_t dsIndex;
SFLSampler *sampler;
ovs_mutex_lock(&mutex);
if (sset_is_empty(&options->targets) || !options->sampling_rate) {
/* No point in doing any work if there are no targets or nothing to
* sample. */
dpif_sflow_clear__(ds);
goto out;
}
options_changed = (!ds->options
|| !ofproto_sflow_options_equal(options, ds->options));
/* Configure collectors if options have changed or if we're shortchanged in
* collectors (which indicates that opening one or more of the configured
* collectors failed, so that we should retry). */
if (options_changed
|| collectors_count(ds->collectors) < sset_count(&options->targets)) {
collectors_destroy(ds->collectors);
collectors_create(&options->targets, SFL_DEFAULT_COLLECTOR_PORT,
&ds->collectors);
if (ds->collectors == NULL) {
VLOG_WARN_RL(&rl, "no collectors could be initialized, "
"sFlow disabled");
dpif_sflow_clear__(ds);
goto out;
}
}
/* Choose agent IP address and agent device (if not yet setup) */
if (!sflow_choose_agent_address(options->agent_device,
&options->targets,
options->control_ip, &agentIP)) {
dpif_sflow_clear__(ds);
goto out;
}
/* Avoid reconfiguring if options didn't change. */
if (!options_changed) {
goto out;
}
ofproto_sflow_options_destroy(ds->options);
ds->options = ofproto_sflow_options_clone(options);
/* Create agent. */
VLOG_INFO("creating sFlow agent %d", options->sub_id);
if (ds->sflow_agent) {
sfl_agent_release(ds->sflow_agent);
}
ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
now = time_wall();
sfl_agent_init(ds->sflow_agent,
&agentIP,
options->sub_id,
now, /* Boot time. */
now, /* Current time. */
ds, /* Pointer supplied to callbacks. */
sflow_agent_alloc_cb,
sflow_agent_free_cb,
sflow_agent_error_cb,
sflow_agent_send_packet_cb);
receiver = sfl_agent_addReceiver(ds->sflow_agent);
sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow");
sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff);
/* Set the sampling_rate down in the datapath. */
ds->probability = MAX(1, UINT32_MAX / ds->options->sampling_rate);
/* Add a single sampler for the bridge. This appears as a PHYSICAL_ENTITY
because it is associated with the hypervisor, and interacts with the server
hardware directly. The sub_id is used to distinguish this sampler from
others on other bridges within the same agent. */
dsIndex = 1000 + options->sub_id;
SFL_DS_SET(dsi, SFL_DSCLASS_PHYSICAL_ENTITY, dsIndex, 0);
sampler = sfl_agent_addSampler(ds->sflow_agent, &dsi);
sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, ds->options->sampling_rate);
sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
/* Add pollers for the currently known ifindex-ports */
HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
dpif_sflow_add_poller(ds, dsp);
}
out:
ovs_mutex_unlock(&mutex);
}
int
dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds,
odp_port_t odp_port) OVS_EXCLUDED(mutex)
{
struct dpif_sflow_port *dsp;
int ret;
ovs_mutex_lock(&mutex);
dsp = dpif_sflow_find_port(ds, odp_port);
ret = dsp ? SFL_DS_INDEX(dsp->dsi) : 0;
ovs_mutex_unlock(&mutex);
return ret;
}
void
dpif_sflow_received(struct dpif_sflow *ds, const struct ofpbuf *packet,
const struct flow *flow, odp_port_t odp_in_port,
const union user_action_cookie *cookie)
OVS_EXCLUDED(mutex)
{
SFL_FLOW_SAMPLE_TYPE fs;
SFLFlow_sample_element hdrElem;
SFLSampled_header *header;
SFLFlow_sample_element switchElem;
sflow: Fix sFlow sampling structure. According to Neil McKee, in an email archived at http://openvswitch.org/pipermail/dev_openvswitch.org/2010-January/000934.html: The containment rule is that a given sflow-datasource (sampler or poller) should be scoped within only one sflow-agent (or sub-agent). So the issue arrises when you have two switches/datapaths defined on the same host being managed with the same IP address: each switch is a separate sub-agent, so they can run independently (e.g. with their own sequence numbers) but they can't both claim to speak for the same sflow-datasource. Specifically, they can't both represent the <ifindex>:0 data-source. This containment rule is necessary so that the sFlow collector can scale and combine the results accurately. One option would be to stick with the <ifindex>:0 data-source but elevate it to be global across all bridges, with a global sample_pool and a global sflow_agent. Not tempting. Better to go the other way and allow each interface to have it's own sampler, just as it already has it's own poller. The ifIndex numbers are globally unique across all switches/datapaths on the host, so the containment is now clean. Datasource <ifindex>:5 might be on one switch, whille <ifindex>:7 can be on another. Other benefits are that 1) you can support the option of overriding the default sampling-rate on an interface-by-interface basis, and 2) this is how most sFlow implementations are coded, so there will be no surprises or interoperability issues with any sFlow collectors out there. This commit implements the approach suggested by Neil. This commit uses an atomic_t to represent the sampling pool. This is because we do want access to it to be atomic, but we expect that it will "mostly" be accessed from a single CPU at a time. Perhaps this is a bad assumption; we can always switch to another form of synchronization later. CC: Neil McKee <neil.mckee@inmon.com>
2010-01-20 13:52:42 -08:00
SFLSampler *sampler;
struct dpif_sflow_port *in_dsp;
ovs_be16 vlan_tci;
ovs_mutex_lock(&mutex);
sampler = ds->sflow_agent->samplers;
if (!sampler) {
goto out;
}
/* Build a flow sample. */
memset(&fs, 0, sizeof fs);
/* Look up the input ifIndex if this port has one. Otherwise just
* leave it as 0 (meaning 'unknown') and continue. */
in_dsp = dpif_sflow_find_port(ds, odp_in_port);
if (in_dsp) {
fs.input = SFL_DS_INDEX(in_dsp->dsi);
sflow: Fix sFlow sampling structure. According to Neil McKee, in an email archived at http://openvswitch.org/pipermail/dev_openvswitch.org/2010-January/000934.html: The containment rule is that a given sflow-datasource (sampler or poller) should be scoped within only one sflow-agent (or sub-agent). So the issue arrises when you have two switches/datapaths defined on the same host being managed with the same IP address: each switch is a separate sub-agent, so they can run independently (e.g. with their own sequence numbers) but they can't both claim to speak for the same sflow-datasource. Specifically, they can't both represent the <ifindex>:0 data-source. This containment rule is necessary so that the sFlow collector can scale and combine the results accurately. One option would be to stick with the <ifindex>:0 data-source but elevate it to be global across all bridges, with a global sample_pool and a global sflow_agent. Not tempting. Better to go the other way and allow each interface to have it's own sampler, just as it already has it's own poller. The ifIndex numbers are globally unique across all switches/datapaths on the host, so the containment is now clean. Datasource <ifindex>:5 might be on one switch, whille <ifindex>:7 can be on another. Other benefits are that 1) you can support the option of overriding the default sampling-rate on an interface-by-interface basis, and 2) this is how most sFlow implementations are coded, so there will be no surprises or interoperability issues with any sFlow collectors out there. This commit implements the approach suggested by Neil. This commit uses an atomic_t to represent the sampling pool. This is because we do want access to it to be atomic, but we expect that it will "mostly" be accessed from a single CPU at a time. Perhaps this is a bad assumption; we can always switch to another form of synchronization later. CC: Neil McKee <neil.mckee@inmon.com>
2010-01-20 13:52:42 -08:00
}
/* Make the assumption that the random number generator in the datapath converges
* to the configured mean, and just increment the samplePool by the configured
* sampling rate every time. */
sampler->samplePool += sfl_sampler_get_sFlowFsPacketSamplingRate(sampler);
/* Sampled header. */
memset(&hdrElem, 0, sizeof hdrElem);
hdrElem.tag = SFLFLOW_HEADER;
header = &hdrElem.flowType.header;
header->header_protocol = SFLHEADER_ETHERNET_ISO8023;
/* The frame_length should include the Ethernet FCS (4 bytes),
* but it has already been stripped, so we need to add 4 here. */
header->frame_length = ofpbuf_size(packet) + 4;
/* Ethernet FCS stripped off. */
header->stripped = 4;
header->header_length = MIN(ofpbuf_size(packet),
datapath: Report kernel's flow key when passing packets up to userspace. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. This commit takes one step in that direction by making the kernel report its idea of the flow that a packet belongs to whenever it passes a packet up to userspace. This means that userspace can intelligently figure out what to do: - If userspace's notion of the flow for the packet matches the kernel's, then nothing special is necessary. - If the kernel has a more specific notion for the flow than userspace, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary: userspace can still set up the flow in the usual way. - If userspace has a more specific notion for the flow than the kernel, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. (This case is bad from a performance point of view, but at least it is correct.) This commit does not actually make userspace flexible enough to handle changes in the kernel flow key structure, although userspace does now have enough information to do that intelligently. This will have to wait for later commits. This commit is bigger than it would otherwise be because it is rolled together with changing "struct odp_msg" to a sequence of Netlink attributes. The alternative, to do each of those changes in a separate patch, seemed like overkill because it meant that either we would have to introduce and then kill off Netlink attributes for in_port and tun_id, if Netlink conversion went first, or shove yet another variable-length header into the stuff already after odp_msg, if adding the flow key to odp_msg went first. This commit will slow down performance of checksumming packets sent up to userspace. I'm not entirely pleased with how I did it. I considered a couple of alternatives, but none of them seemed that much better. Suggestions welcome. Not changing anything wasn't an option, unfortunately. At any rate some slowdown will become unavoidable when OVS actually starts using Netlink instead of just Netlink framing. (Actually, I thought of one option where we could avoid that: make userspace do the checksum instead, by passing csum_start and csum_offset as part of what goes to userspace. But that's not perfect either.) Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
sampler->sFlowFsMaximumHeaderSize);
header->header_bytes = ofpbuf_data(packet);
/* Add extended switch element. */
memset(&switchElem, 0, sizeof(switchElem));
switchElem.tag = SFLFLOW_EX_SWITCH;
datapath: Report kernel's flow key when passing packets up to userspace. One of the goals for Open vSwitch is to decouple kernel and userspace software, so that either one can be upgraded or rolled back independent of the other. To do this in full generality, it must be possible to change the kernel's idea of the flow key separately from the userspace version. This commit takes one step in that direction by making the kernel report its idea of the flow that a packet belongs to whenever it passes a packet up to userspace. This means that userspace can intelligently figure out what to do: - If userspace's notion of the flow for the packet matches the kernel's, then nothing special is necessary. - If the kernel has a more specific notion for the flow than userspace, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary: userspace can still set up the flow in the usual way. - If userspace has a more specific notion for the flow than the kernel, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. (This case is bad from a performance point of view, but at least it is correct.) This commit does not actually make userspace flexible enough to handle changes in the kernel flow key structure, although userspace does now have enough information to do that intelligently. This will have to wait for later commits. This commit is bigger than it would otherwise be because it is rolled together with changing "struct odp_msg" to a sequence of Netlink attributes. The alternative, to do each of those changes in a separate patch, seemed like overkill because it meant that either we would have to introduce and then kill off Netlink attributes for in_port and tun_id, if Netlink conversion went first, or shove yet another variable-length header into the stuff already after odp_msg, if adding the flow key to odp_msg went first. This commit will slow down performance of checksumming packets sent up to userspace. I'm not entirely pleased with how I did it. I considered a couple of alternatives, but none of them seemed that much better. Suggestions welcome. Not changing anything wasn't an option, unfortunately. At any rate some slowdown will become unavoidable when OVS actually starts using Netlink instead of just Netlink framing. (Actually, I thought of one option where we could avoid that: make userspace do the checksum instead, by passing csum_start and csum_offset as part of what goes to userspace. But that's not perfect either.) Signed-off-by: Ben Pfaff <blp@nicira.com> Acked-by: Jesse Gross <jesse@nicira.com>
2011-01-24 14:59:57 -08:00
switchElem.flowType.sw.src_vlan = vlan_tci_to_vid(flow->vlan_tci);
switchElem.flowType.sw.src_priority = vlan_tci_to_pcp(flow->vlan_tci);
/* Retrieve data from user_action_cookie. */
vlan_tci = cookie->sflow.vlan_tci;
switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(vlan_tci);
switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(vlan_tci);
fs.output = cookie->sflow.output;
/* Submit the flow sample to be encoded into the next datagram. */
SFLADD_ELEMENT(&fs, &hdrElem);
SFLADD_ELEMENT(&fs, &switchElem);
sfl_sampler_writeFlowSample(sampler, &fs);
out:
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_run(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
if (ds->collectors != NULL) {
time_t now = time_now();
route_table_run();
if (now >= ds->next_tick) {
sfl_agent_tick(ds->sflow_agent, time_wall());
ds->next_tick = now + 1;
}
}
ovs_mutex_unlock(&mutex);
}
void
dpif_sflow_wait(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
{
ovs_mutex_lock(&mutex);
if (ds->collectors != NULL) {
poll_timer_wait_until(ds->next_tick * 1000LL);
}
ovs_mutex_unlock(&mutex);
}