mirror of
https://github.com/openvswitch/ovs
synced 2025-08-22 09:58:01 +00:00
sflow: Export OVS datapath performance counters via sFlow.
The OVS cache hit/miss counters and memory/CPU usage statistics have been identified as important metrics when managing large deployments. This patch allows them to be pushed periodically as part of the sFlow feed, and represents a more efficient and scalable alternative to polling via ovs-dpctl(1). Signed-off-by: Neil McKee <neil.mckee@inmon.com> Signed-off-by: Ben Pfaff <blp@nicira.com>
This commit is contained in:
parent
849222ddd5
commit
3d2912f20e
2
NEWS
2
NEWS
@ -12,7 +12,7 @@ Post-v2.3.0
|
|||||||
http://tools.ietf.org/html/draft-gross-geneve-00
|
http://tools.ietf.org/html/draft-gross-geneve-00
|
||||||
- The OVS database now reports controller rate limiting statistics.
|
- The OVS database now reports controller rate limiting statistics.
|
||||||
- sflow now exports information about LACP-based bonds, port names, and
|
- sflow now exports information about LACP-based bonds, port names, and
|
||||||
OpenFlow port numbers.
|
OpenFlow port numbers, as well as datapath performance counters.
|
||||||
- ovs-dpctl functionality is now available for datapaths integrated
|
- ovs-dpctl functionality is now available for datapaths integrated
|
||||||
into ovs-vswitchd, via ovs-appctl. Some existing ovs-appctl
|
into ovs-vswitchd, via ovs-appctl. Some existing ovs-appctl
|
||||||
commands are now redundant and will be removed in a future
|
commands are now redundant and will be removed in a future
|
||||||
|
34
lib/sflow.h
34
lib/sflow.h
@ -543,6 +543,34 @@ typedef struct _SFLLACP_counters {
|
|||||||
|
|
||||||
#define SFL_CTR_LACP_XDR_SIZE 56
|
#define SFL_CTR_LACP_XDR_SIZE 56
|
||||||
|
|
||||||
|
/* Application resource counters */
|
||||||
|
|
||||||
|
typedef struct _SFLAPPResources_counters {
|
||||||
|
uint32_t user_time; /* in milliseconds */
|
||||||
|
uint32_t system_time; /* in milliseconds */
|
||||||
|
uint64_t mem_used;
|
||||||
|
uint64_t mem_max;
|
||||||
|
uint32_t fd_open;
|
||||||
|
uint32_t fd_max;
|
||||||
|
uint32_t conn_open;
|
||||||
|
uint32_t conn_max;
|
||||||
|
} SFLAPPResources_counters;
|
||||||
|
|
||||||
|
#define SFL_CTR_APP_RESOURCES_XDR_SIZE 40
|
||||||
|
|
||||||
|
/* OVS datapath stats */
|
||||||
|
|
||||||
|
typedef struct _SFLOVSDP_counters {
|
||||||
|
uint32_t n_hit;
|
||||||
|
uint32_t n_missed;
|
||||||
|
uint32_t n_lost;
|
||||||
|
uint32_t n_mask_hit;
|
||||||
|
uint32_t n_flows;
|
||||||
|
uint32_t n_masks;
|
||||||
|
} SFLOVSDP_counters;
|
||||||
|
|
||||||
|
#define SFL_CTR_OVSDP_XDR_SIZE 24
|
||||||
|
|
||||||
/* Counters data */
|
/* Counters data */
|
||||||
|
|
||||||
enum SFLCounters_type_tag {
|
enum SFLCounters_type_tag {
|
||||||
@ -554,7 +582,9 @@ enum SFLCounters_type_tag {
|
|||||||
SFLCOUNTERS_VLAN = 5,
|
SFLCOUNTERS_VLAN = 5,
|
||||||
SFLCOUNTERS_LACP = 7,
|
SFLCOUNTERS_LACP = 7,
|
||||||
SFLCOUNTERS_OPENFLOWPORT = 1004,
|
SFLCOUNTERS_OPENFLOWPORT = 1004,
|
||||||
SFLCOUNTERS_PORTNAME = 1005
|
SFLCOUNTERS_PORTNAME = 1005,
|
||||||
|
SFLCOUNTERS_APP_RESOURCES = 2203,
|
||||||
|
SFLCOUNTERS_OVSDP = 2207
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef union _SFLCounters_type {
|
typedef union _SFLCounters_type {
|
||||||
@ -566,6 +596,8 @@ typedef union _SFLCounters_type {
|
|||||||
SFLLACP_counters lacp;
|
SFLLACP_counters lacp;
|
||||||
SFLOpenFlowPort ofPort;
|
SFLOpenFlowPort ofPort;
|
||||||
SFLPortName portName;
|
SFLPortName portName;
|
||||||
|
SFLAPPResources_counters appResources;
|
||||||
|
SFLOVSDP_counters ovsdp;
|
||||||
} SFLCounters_type;
|
} SFLCounters_type;
|
||||||
|
|
||||||
typedef struct _SFLCounters_sample_element {
|
typedef struct _SFLCounters_sample_element {
|
||||||
|
@ -652,6 +652,8 @@ static int computeCountersSampleSize(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
|
|||||||
case SFLCOUNTERS_LACP: elemSiz = SFL_CTR_LACP_XDR_SIZE; break;
|
case SFLCOUNTERS_LACP: elemSiz = SFL_CTR_LACP_XDR_SIZE; break;
|
||||||
case SFLCOUNTERS_OPENFLOWPORT: elemSiz = SFL_CTR_OPENFLOWPORT_XDR_SIZE; break;
|
case SFLCOUNTERS_OPENFLOWPORT: elemSiz = SFL_CTR_OPENFLOWPORT_XDR_SIZE; break;
|
||||||
case SFLCOUNTERS_PORTNAME: elemSiz = stringEncodingLength(&elem->counterBlock.portName.portName); break;
|
case SFLCOUNTERS_PORTNAME: elemSiz = stringEncodingLength(&elem->counterBlock.portName.portName); break;
|
||||||
|
case SFLCOUNTERS_APP_RESOURCES: elemSiz = SFL_CTR_APP_RESOURCES_XDR_SIZE; break;
|
||||||
|
case SFLCOUNTERS_OVSDP: elemSiz = SFL_CTR_OVSDP_XDR_SIZE; break;
|
||||||
default:
|
default:
|
||||||
sflError(receiver, "unexpected counters_tag");
|
sflError(receiver, "unexpected counters_tag");
|
||||||
return -1;
|
return -1;
|
||||||
@ -774,6 +776,24 @@ int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
|
|||||||
case SFLCOUNTERS_PORTNAME:
|
case SFLCOUNTERS_PORTNAME:
|
||||||
putString(receiver, &elem->counterBlock.portName.portName);
|
putString(receiver, &elem->counterBlock.portName.portName);
|
||||||
break;
|
break;
|
||||||
|
case SFLCOUNTERS_APP_RESOURCES:
|
||||||
|
putNet32(receiver, elem->counterBlock.appResources.user_time);
|
||||||
|
putNet32(receiver, elem->counterBlock.appResources.system_time);
|
||||||
|
putNet64(receiver, elem->counterBlock.appResources.mem_used);
|
||||||
|
putNet64(receiver, elem->counterBlock.appResources.mem_max);
|
||||||
|
putNet32(receiver, elem->counterBlock.appResources.fd_open);
|
||||||
|
putNet32(receiver, elem->counterBlock.appResources.fd_max);
|
||||||
|
putNet32(receiver, elem->counterBlock.appResources.conn_open);
|
||||||
|
putNet32(receiver, elem->counterBlock.appResources.conn_max);
|
||||||
|
break;
|
||||||
|
case SFLCOUNTERS_OVSDP:
|
||||||
|
putNet32(receiver, elem->counterBlock.ovsdp.n_hit);
|
||||||
|
putNet32(receiver, elem->counterBlock.ovsdp.n_missed);
|
||||||
|
putNet32(receiver, elem->counterBlock.ovsdp.n_lost);
|
||||||
|
putNet32(receiver, elem->counterBlock.ovsdp.n_mask_hit);
|
||||||
|
putNet32(receiver, elem->counterBlock.ovsdp.n_flows);
|
||||||
|
putNet32(receiver, elem->counterBlock.ovsdp.n_masks);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
sflError(receiver, "unexpected counters_tag");
|
sflError(receiver, "unexpected counters_tag");
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
|
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
|
||||||
* Copyright (c) 2009 InMon Corp.
|
* Copyright (c) 2009 InMon Corp.
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -18,6 +18,7 @@
|
|||||||
#include <config.h>
|
#include <config.h>
|
||||||
#include "ofproto-dpif-sflow.h"
|
#include "ofproto-dpif-sflow.h"
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <net/if.h>
|
#include <net/if.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -46,6 +47,11 @@ VLOG_DEFINE_THIS_MODULE(sflow);
|
|||||||
|
|
||||||
static struct ovs_mutex mutex;
|
static struct ovs_mutex mutex;
|
||||||
|
|
||||||
|
/* This global var is used to determine which sFlow
|
||||||
|
sub-agent should send the datapath counters. */
|
||||||
|
#define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1
|
||||||
|
static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
|
||||||
|
|
||||||
struct dpif_sflow_port {
|
struct dpif_sflow_port {
|
||||||
struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
|
struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
|
||||||
SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
|
SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
|
||||||
@ -161,6 +167,123 @@ dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Call to get the datapath stats. Modeled after the dpctl utility.
|
||||||
|
*
|
||||||
|
* It might be more efficient for this module to be given a handle it can use
|
||||||
|
* to get these stats more efficiently, but this is only going to be called
|
||||||
|
* once every 20-30 seconds. Return number of datapaths found (normally expect
|
||||||
|
* 1). */
|
||||||
|
static int
|
||||||
|
sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
|
||||||
|
struct dpif_dp_stats *dp_totals)
|
||||||
|
{
|
||||||
|
struct sset types;
|
||||||
|
const char *type;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
memset(dp_totals, 0, sizeof *dp_totals);
|
||||||
|
sset_init(&types);
|
||||||
|
dp_enumerate_types(&types);
|
||||||
|
SSET_FOR_EACH (type, &types) {
|
||||||
|
struct sset names;
|
||||||
|
const char *name;
|
||||||
|
sset_init(&names);
|
||||||
|
if (dp_enumerate_names(type, &names) == 0) {
|
||||||
|
SSET_FOR_EACH (name, &names) {
|
||||||
|
struct dpif *dpif;
|
||||||
|
if (dpif_open(name, type, &dpif) == 0) {
|
||||||
|
struct dpif_dp_stats dp_stats;
|
||||||
|
if (dpif_get_dp_stats(dpif, &dp_stats) == 0) {
|
||||||
|
count++;
|
||||||
|
dp_totals->n_hit += dp_stats.n_hit;
|
||||||
|
dp_totals->n_missed += dp_stats.n_missed;
|
||||||
|
dp_totals->n_lost += dp_stats.n_lost;
|
||||||
|
dp_totals->n_flows += dp_stats.n_flows;
|
||||||
|
dp_totals->n_mask_hit += dp_stats.n_mask_hit;
|
||||||
|
dp_totals->n_masks += dp_stats.n_masks;
|
||||||
|
}
|
||||||
|
dpif_close(dpif);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sset_destroy(&names);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sset_destroy(&types);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If there are multiple bridges defined then we need some
|
||||||
|
minimal artibration to decide which one should send the
|
||||||
|
global counters. This function allows each sub-agent to
|
||||||
|
ask if he should do it or not. */
|
||||||
|
static bool
|
||||||
|
sflow_global_counters_subid_test(uint32_t subid)
|
||||||
|
OVS_REQUIRES(mutex)
|
||||||
|
{
|
||||||
|
if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) {
|
||||||
|
/* The role is up for grabs. */
|
||||||
|
sflow_global_counters_subid = subid;
|
||||||
|
}
|
||||||
|
return (sflow_global_counters_subid == subid);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
sflow_global_counters_subid_clear(uint32_t subid)
|
||||||
|
OVS_REQUIRES(mutex)
|
||||||
|
{
|
||||||
|
if (sflow_global_counters_subid == subid) {
|
||||||
|
/* The sub-agent that was sending global counters
|
||||||
|
is going away, so reset to allow another
|
||||||
|
to take over. */
|
||||||
|
sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
|
||||||
|
SFL_COUNTERS_SAMPLE_TYPE *cs)
|
||||||
|
OVS_REQUIRES(mutex)
|
||||||
|
{
|
||||||
|
struct dpif_sflow *ds = ds_;
|
||||||
|
SFLCounters_sample_element dp_elem, res_elem;
|
||||||
|
struct dpif_dp_stats dp_totals;
|
||||||
|
struct rusage usage;
|
||||||
|
|
||||||
|
if (!sflow_global_counters_subid_test(poller->agent->subId)) {
|
||||||
|
/* Another sub-agent is currently responsible for this. */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* datapath stats */
|
||||||
|
if (sflow_get_dp_stats(ds, &dp_totals)) {
|
||||||
|
dp_elem.tag = SFLCOUNTERS_OVSDP;
|
||||||
|
dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit;
|
||||||
|
dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed;
|
||||||
|
dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost;
|
||||||
|
dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit;
|
||||||
|
dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows;
|
||||||
|
dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks;
|
||||||
|
SFLADD_ELEMENT(cs, &dp_elem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* resource usage */
|
||||||
|
getrusage(RUSAGE_SELF, &usage);
|
||||||
|
res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
|
||||||
|
res_elem.counterBlock.appResources.user_time
|
||||||
|
= timeval_to_msec(&usage.ru_utime);
|
||||||
|
res_elem.counterBlock.appResources.system_time
|
||||||
|
= timeval_to_msec(&usage.ru_stime);
|
||||||
|
res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024);
|
||||||
|
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max);
|
||||||
|
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open);
|
||||||
|
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max);
|
||||||
|
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open);
|
||||||
|
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);
|
||||||
|
|
||||||
|
SFLADD_ELEMENT(cs, &res_elem);
|
||||||
|
sfl_poller_writeCountersSample(poller, cs);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
sflow_agent_get_counters(void *ds_, SFLPoller *poller,
|
sflow_agent_get_counters(void *ds_, SFLPoller *poller,
|
||||||
SFL_COUNTERS_SAMPLE_TYPE *cs)
|
SFL_COUNTERS_SAMPLE_TYPE *cs)
|
||||||
@ -343,6 +466,7 @@ static void
|
|||||||
dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
|
dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
|
||||||
{
|
{
|
||||||
if (ds->sflow_agent) {
|
if (ds->sflow_agent) {
|
||||||
|
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
|
||||||
sfl_agent_release(ds->sflow_agent);
|
sfl_agent_release(ds->sflow_agent);
|
||||||
free(ds->sflow_agent);
|
free(ds->sflow_agent);
|
||||||
ds->sflow_agent = NULL;
|
ds->sflow_agent = NULL;
|
||||||
@ -516,6 +640,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
|
|||||||
SFLDataSource_instance dsi;
|
SFLDataSource_instance dsi;
|
||||||
uint32_t dsIndex;
|
uint32_t dsIndex;
|
||||||
SFLSampler *sampler;
|
SFLSampler *sampler;
|
||||||
|
SFLPoller *poller;
|
||||||
|
|
||||||
ovs_mutex_lock(&mutex);
|
ovs_mutex_lock(&mutex);
|
||||||
if (sset_is_empty(&options->targets) || !options->sampling_rate) {
|
if (sset_is_empty(&options->targets) || !options->sampling_rate) {
|
||||||
@ -562,6 +687,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
|
|||||||
/* Create agent. */
|
/* Create agent. */
|
||||||
VLOG_INFO("creating sFlow agent %d", options->sub_id);
|
VLOG_INFO("creating sFlow agent %d", options->sub_id);
|
||||||
if (ds->sflow_agent) {
|
if (ds->sflow_agent) {
|
||||||
|
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
|
||||||
sfl_agent_release(ds->sflow_agent);
|
sfl_agent_release(ds->sflow_agent);
|
||||||
}
|
}
|
||||||
ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
|
ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
|
||||||
@ -595,6 +721,13 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
|
|||||||
sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
|
sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
|
||||||
sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
|
sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
|
||||||
|
|
||||||
|
/* Add a counter poller for the bridge so we can use it to send
|
||||||
|
global counters such as datapath cache hit/miss stats. */
|
||||||
|
poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds,
|
||||||
|
sflow_agent_get_global_counters);
|
||||||
|
sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
|
||||||
|
sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
|
||||||
|
|
||||||
/* Add pollers for the currently known ifindex-ports */
|
/* Add pollers for the currently known ifindex-ports */
|
||||||
HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
|
HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
|
||||||
dpif_sflow_add_poller(ds, dsp);
|
dpif_sflow_add_poller(ds, dsp);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user