mirror of
https://github.com/openvswitch/ovs
synced 2025-08-22 09:58:01 +00:00
sflow: Export OVS datapath performance counters via sFlow.
The OVS cache hit/miss counters and memory/CPU usage statistics have been identified as important metrics when managing large deployments. This patch allows them to be pushed periodically as part of the sFlow feed, and represents a more efficient and scalable alternative to polling via ovs-dpctl(1). Signed-off-by: Neil McKee <neil.mckee@inmon.com> Signed-off-by: Ben Pfaff <blp@nicira.com>
This commit is contained in:
parent
849222ddd5
commit
3d2912f20e
2
NEWS
2
NEWS
@ -12,7 +12,7 @@ Post-v2.3.0
|
||||
http://tools.ietf.org/html/draft-gross-geneve-00
|
||||
- The OVS database now reports controller rate limiting statistics.
|
||||
- sflow now exports information about LACP-based bonds, port names, and
|
||||
OpenFlow port numbers.
|
||||
OpenFlow port numbers, as well as datapath performance counters.
|
||||
- ovs-dpctl functionality is now available for datapaths integrated
|
||||
into ovs-vswitchd, via ovs-appctl. Some existing ovs-appctl
|
||||
commands are now redundant and will be removed in a future
|
||||
|
34
lib/sflow.h
34
lib/sflow.h
@ -543,6 +543,34 @@ typedef struct _SFLLACP_counters {
|
||||
|
||||
#define SFL_CTR_LACP_XDR_SIZE 56
|
||||
|
||||
/* Application resource counters */
|
||||
|
||||
typedef struct _SFLAPPResources_counters {
|
||||
uint32_t user_time; /* in milliseconds */
|
||||
uint32_t system_time; /* in milliseconds */
|
||||
uint64_t mem_used;
|
||||
uint64_t mem_max;
|
||||
uint32_t fd_open;
|
||||
uint32_t fd_max;
|
||||
uint32_t conn_open;
|
||||
uint32_t conn_max;
|
||||
} SFLAPPResources_counters;
|
||||
|
||||
#define SFL_CTR_APP_RESOURCES_XDR_SIZE 40
|
||||
|
||||
/* OVS datapath stats */
|
||||
|
||||
typedef struct _SFLOVSDP_counters {
|
||||
uint32_t n_hit;
|
||||
uint32_t n_missed;
|
||||
uint32_t n_lost;
|
||||
uint32_t n_mask_hit;
|
||||
uint32_t n_flows;
|
||||
uint32_t n_masks;
|
||||
} SFLOVSDP_counters;
|
||||
|
||||
#define SFL_CTR_OVSDP_XDR_SIZE 24
|
||||
|
||||
/* Counters data */
|
||||
|
||||
enum SFLCounters_type_tag {
|
||||
@ -554,7 +582,9 @@ enum SFLCounters_type_tag {
|
||||
SFLCOUNTERS_VLAN = 5,
|
||||
SFLCOUNTERS_LACP = 7,
|
||||
SFLCOUNTERS_OPENFLOWPORT = 1004,
|
||||
SFLCOUNTERS_PORTNAME = 1005
|
||||
SFLCOUNTERS_PORTNAME = 1005,
|
||||
SFLCOUNTERS_APP_RESOURCES = 2203,
|
||||
SFLCOUNTERS_OVSDP = 2207
|
||||
};
|
||||
|
||||
typedef union _SFLCounters_type {
|
||||
@ -566,6 +596,8 @@ typedef union _SFLCounters_type {
|
||||
SFLLACP_counters lacp;
|
||||
SFLOpenFlowPort ofPort;
|
||||
SFLPortName portName;
|
||||
SFLAPPResources_counters appResources;
|
||||
SFLOVSDP_counters ovsdp;
|
||||
} SFLCounters_type;
|
||||
|
||||
typedef struct _SFLCounters_sample_element {
|
||||
|
@ -652,6 +652,8 @@ static int computeCountersSampleSize(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
|
||||
case SFLCOUNTERS_LACP: elemSiz = SFL_CTR_LACP_XDR_SIZE; break;
|
||||
case SFLCOUNTERS_OPENFLOWPORT: elemSiz = SFL_CTR_OPENFLOWPORT_XDR_SIZE; break;
|
||||
case SFLCOUNTERS_PORTNAME: elemSiz = stringEncodingLength(&elem->counterBlock.portName.portName); break;
|
||||
case SFLCOUNTERS_APP_RESOURCES: elemSiz = SFL_CTR_APP_RESOURCES_XDR_SIZE; break;
|
||||
case SFLCOUNTERS_OVSDP: elemSiz = SFL_CTR_OVSDP_XDR_SIZE; break;
|
||||
default:
|
||||
sflError(receiver, "unexpected counters_tag");
|
||||
return -1;
|
||||
@ -774,6 +776,24 @@ int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_
|
||||
case SFLCOUNTERS_PORTNAME:
|
||||
putString(receiver, &elem->counterBlock.portName.portName);
|
||||
break;
|
||||
case SFLCOUNTERS_APP_RESOURCES:
|
||||
putNet32(receiver, elem->counterBlock.appResources.user_time);
|
||||
putNet32(receiver, elem->counterBlock.appResources.system_time);
|
||||
putNet64(receiver, elem->counterBlock.appResources.mem_used);
|
||||
putNet64(receiver, elem->counterBlock.appResources.mem_max);
|
||||
putNet32(receiver, elem->counterBlock.appResources.fd_open);
|
||||
putNet32(receiver, elem->counterBlock.appResources.fd_max);
|
||||
putNet32(receiver, elem->counterBlock.appResources.conn_open);
|
||||
putNet32(receiver, elem->counterBlock.appResources.conn_max);
|
||||
break;
|
||||
case SFLCOUNTERS_OVSDP:
|
||||
putNet32(receiver, elem->counterBlock.ovsdp.n_hit);
|
||||
putNet32(receiver, elem->counterBlock.ovsdp.n_missed);
|
||||
putNet32(receiver, elem->counterBlock.ovsdp.n_lost);
|
||||
putNet32(receiver, elem->counterBlock.ovsdp.n_mask_hit);
|
||||
putNet32(receiver, elem->counterBlock.ovsdp.n_flows);
|
||||
putNet32(receiver, elem->counterBlock.ovsdp.n_masks);
|
||||
break;
|
||||
default:
|
||||
sflError(receiver, "unexpected counters_tag");
|
||||
return -1;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
|
||||
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
|
||||
* Copyright (c) 2009 InMon Corp.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -18,6 +18,7 @@
|
||||
#include <config.h>
|
||||
#include "ofproto-dpif-sflow.h"
|
||||
#include <inttypes.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/socket.h>
|
||||
#include <net/if.h>
|
||||
#include <stdlib.h>
|
||||
@ -46,6 +47,11 @@ VLOG_DEFINE_THIS_MODULE(sflow);
|
||||
|
||||
static struct ovs_mutex mutex;
|
||||
|
||||
/* This global var is used to determine which sFlow
|
||||
sub-agent should send the datapath counters. */
|
||||
#define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1
|
||||
static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
|
||||
|
||||
struct dpif_sflow_port {
|
||||
struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
|
||||
SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
|
||||
@ -161,6 +167,123 @@ dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Call to get the datapath stats. Modeled after the dpctl utility.
|
||||
*
|
||||
* It might be more efficient for this module to be given a handle it can use
|
||||
* to get these stats more efficiently, but this is only going to be called
|
||||
* once every 20-30 seconds. Return number of datapaths found (normally expect
|
||||
* 1). */
|
||||
static int
|
||||
sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
|
||||
struct dpif_dp_stats *dp_totals)
|
||||
{
|
||||
struct sset types;
|
||||
const char *type;
|
||||
int count = 0;
|
||||
|
||||
memset(dp_totals, 0, sizeof *dp_totals);
|
||||
sset_init(&types);
|
||||
dp_enumerate_types(&types);
|
||||
SSET_FOR_EACH (type, &types) {
|
||||
struct sset names;
|
||||
const char *name;
|
||||
sset_init(&names);
|
||||
if (dp_enumerate_names(type, &names) == 0) {
|
||||
SSET_FOR_EACH (name, &names) {
|
||||
struct dpif *dpif;
|
||||
if (dpif_open(name, type, &dpif) == 0) {
|
||||
struct dpif_dp_stats dp_stats;
|
||||
if (dpif_get_dp_stats(dpif, &dp_stats) == 0) {
|
||||
count++;
|
||||
dp_totals->n_hit += dp_stats.n_hit;
|
||||
dp_totals->n_missed += dp_stats.n_missed;
|
||||
dp_totals->n_lost += dp_stats.n_lost;
|
||||
dp_totals->n_flows += dp_stats.n_flows;
|
||||
dp_totals->n_mask_hit += dp_stats.n_mask_hit;
|
||||
dp_totals->n_masks += dp_stats.n_masks;
|
||||
}
|
||||
dpif_close(dpif);
|
||||
}
|
||||
}
|
||||
sset_destroy(&names);
|
||||
}
|
||||
}
|
||||
sset_destroy(&types);
|
||||
return count;
|
||||
}
|
||||
|
||||
/* If there are multiple bridges defined then we need some
|
||||
minimal artibration to decide which one should send the
|
||||
global counters. This function allows each sub-agent to
|
||||
ask if he should do it or not. */
|
||||
static bool
|
||||
sflow_global_counters_subid_test(uint32_t subid)
|
||||
OVS_REQUIRES(mutex)
|
||||
{
|
||||
if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) {
|
||||
/* The role is up for grabs. */
|
||||
sflow_global_counters_subid = subid;
|
||||
}
|
||||
return (sflow_global_counters_subid == subid);
|
||||
}
|
||||
|
||||
static void
|
||||
sflow_global_counters_subid_clear(uint32_t subid)
|
||||
OVS_REQUIRES(mutex)
|
||||
{
|
||||
if (sflow_global_counters_subid == subid) {
|
||||
/* The sub-agent that was sending global counters
|
||||
is going away, so reset to allow another
|
||||
to take over. */
|
||||
sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
|
||||
SFL_COUNTERS_SAMPLE_TYPE *cs)
|
||||
OVS_REQUIRES(mutex)
|
||||
{
|
||||
struct dpif_sflow *ds = ds_;
|
||||
SFLCounters_sample_element dp_elem, res_elem;
|
||||
struct dpif_dp_stats dp_totals;
|
||||
struct rusage usage;
|
||||
|
||||
if (!sflow_global_counters_subid_test(poller->agent->subId)) {
|
||||
/* Another sub-agent is currently responsible for this. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* datapath stats */
|
||||
if (sflow_get_dp_stats(ds, &dp_totals)) {
|
||||
dp_elem.tag = SFLCOUNTERS_OVSDP;
|
||||
dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit;
|
||||
dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed;
|
||||
dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost;
|
||||
dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit;
|
||||
dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows;
|
||||
dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks;
|
||||
SFLADD_ELEMENT(cs, &dp_elem);
|
||||
}
|
||||
|
||||
/* resource usage */
|
||||
getrusage(RUSAGE_SELF, &usage);
|
||||
res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
|
||||
res_elem.counterBlock.appResources.user_time
|
||||
= timeval_to_msec(&usage.ru_utime);
|
||||
res_elem.counterBlock.appResources.system_time
|
||||
= timeval_to_msec(&usage.ru_stime);
|
||||
res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024);
|
||||
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max);
|
||||
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open);
|
||||
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max);
|
||||
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open);
|
||||
SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);
|
||||
|
||||
SFLADD_ELEMENT(cs, &res_elem);
|
||||
sfl_poller_writeCountersSample(poller, cs);
|
||||
}
|
||||
|
||||
static void
|
||||
sflow_agent_get_counters(void *ds_, SFLPoller *poller,
|
||||
SFL_COUNTERS_SAMPLE_TYPE *cs)
|
||||
@ -343,6 +466,7 @@ static void
|
||||
dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
|
||||
{
|
||||
if (ds->sflow_agent) {
|
||||
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
|
||||
sfl_agent_release(ds->sflow_agent);
|
||||
free(ds->sflow_agent);
|
||||
ds->sflow_agent = NULL;
|
||||
@ -516,6 +640,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
|
||||
SFLDataSource_instance dsi;
|
||||
uint32_t dsIndex;
|
||||
SFLSampler *sampler;
|
||||
SFLPoller *poller;
|
||||
|
||||
ovs_mutex_lock(&mutex);
|
||||
if (sset_is_empty(&options->targets) || !options->sampling_rate) {
|
||||
@ -562,6 +687,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
|
||||
/* Create agent. */
|
||||
VLOG_INFO("creating sFlow agent %d", options->sub_id);
|
||||
if (ds->sflow_agent) {
|
||||
sflow_global_counters_subid_clear(ds->sflow_agent->subId);
|
||||
sfl_agent_release(ds->sflow_agent);
|
||||
}
|
||||
ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
|
||||
@ -595,6 +721,13 @@ dpif_sflow_set_options(struct dpif_sflow *ds,
|
||||
sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
|
||||
sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
|
||||
|
||||
/* Add a counter poller for the bridge so we can use it to send
|
||||
global counters such as datapath cache hit/miss stats. */
|
||||
poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds,
|
||||
sflow_agent_get_global_counters);
|
||||
sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
|
||||
sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
|
||||
|
||||
/* Add pollers for the currently known ifindex-ports */
|
||||
HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
|
||||
dpif_sflow_add_poller(ds, dsp);
|
||||
|
Loading…
x
Reference in New Issue
Block a user