From 69d60f9f4e8d8f1930cea759e67037f47b1bdddb Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 14 Aug 2009 14:23:44 -0700 Subject: [PATCH 01/23] bridge: Allow flows based on ARP opcode to be installed. Since we can now distinguish between flows with different ARP opcodes in the kernel, allow these flows to be installed. --- vswitchd/bridge.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 05003e1c1..157dc6866 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -1791,13 +1791,11 @@ compose_actions(struct bridge *br, const flow_t *flow, uint16_t vlan, } static bool -is_bcast_arp_reply(const flow_t *flow, const struct ofpbuf *packet) +is_bcast_arp_reply(const flow_t *flow) { - struct arp_eth_header *arp = (struct arp_eth_header *) packet->data; return (flow->dl_type == htons(ETH_TYPE_ARP) - && eth_addr_is_broadcast(flow->dl_dst) - && packet->size >= sizeof(struct arp_eth_header) - && arp->ar_op == ARP_OP_REQUEST); + && flow->nw_proto == ARP_OP_REPLY + && eth_addr_is_broadcast(flow->dl_dst)); } /* If the composed actions may be applied to any packet in the given 'flow', @@ -1915,7 +1913,7 @@ process_flow(struct bridge *br, const flow_t *flow, * to this rule: the host has moved to another switch. */ src_idx = mac_learning_lookup(br->ml, flow->dl_src, vlan); if (src_idx != -1 && src_idx != in_port->port_idx && - (!packet || !is_bcast_arp_reply(flow, packet))) { + !is_bcast_arp_reply(flow)) { goto done; } } @@ -1964,15 +1962,7 @@ process_flow(struct bridge *br, const flow_t *flow, done: compose_actions(br, flow, vlan, in_port, out_port, tags, actions); - /* - * We send out only a single packet, instead of setting up a flow, if the - * packet is an ARP directed to broadcast that arrived on a bonded - * interface. In such a situation ARP requests and replies must be handled - * differently, but OpenFlow unfortunately can't distinguish them. - */ - return (in_port->n_ifaces < 2 - || flow->dl_type != htons(ETH_TYPE_ARP) - || !eth_addr_is_broadcast(flow->dl_dst)); + return true; } /* Careful: 'opp' is in host byte order and opp->port_no is an OFP port From cdd35cff225bf39767ea9e2e535cf2940b0e5127 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Sat, 24 Oct 2009 15:51:31 -0700 Subject: [PATCH 02/23] ovs-vswitchd: Document "bridge/dump-flows" management command --- vswitchd/ovs-vswitchd.8.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index 3750cb308..5c8d6c772 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -99,6 +99,12 @@ These commands manage bridges. Lists each MAC address/VLAN pair learned by the specified \fIbridge\fR, along with the port on which it was learned and the age of the entry, in seconds. +. +.IP "\fBbridge/dump-flows\fR \fIbridge\fR" +Lists all flows in \fIbridge\fR, including those normally hidden to +commands such as \fBovs-ofctl dump-flows\fR. Flows set up by mechanisms +such as in-band control and fail-open are hidden from the controller +since it is not allowed to modify or override them. .SS "BOND COMMANDS" These commands manage bonded ports on an Open vSwitch's bridges. To understand some of these commands, it is important to understand a From 9052790aac60c01195c170d0ed2c7159e1343af7 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Mon, 26 Oct 2009 12:02:02 -0700 Subject: [PATCH 03/23] flow: Differentiate between "port" when printing flows When printing a flow, there were two references to "port": one the interface the packet arrived on and the other the L4 ports. This could be a bit confusing to new users looking at the output of a command such as "ovs-ofctl dump-flows". This commit changes the incoming interface field from "port" to "in_port". --- lib/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/flow.c b/lib/flow.c index c1f6240f0..7d4a1bd4d 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -317,7 +317,7 @@ flow_to_string(const flow_t *flow) void flow_format(struct ds *ds, const flow_t *flow) { - ds_put_format(ds, "port%04x:vlan%d mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" " + ds_put_format(ds, "in_port%04x:vlan%d mac"ETH_ADDR_FMT"->"ETH_ADDR_FMT" " "type%04x proto%"PRId8" ip"IP_FMT"->"IP_FMT" port%d->%d", flow->in_port, ntohs(flow->dl_vlan), ETH_ADDR_ARGS(flow->dl_src), ETH_ADDR_ARGS(flow->dl_dst), From d6fbec6de043d5b9323a06417cc25b0fbba0ff8f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 26 Oct 2009 14:41:32 -0700 Subject: [PATCH 04/23] Spell verb form of "set up" correctly throughout the tree. --- datapath/datapath.c | 2 +- extras/ezio/ovs-switchui.c | 6 ++-- lib/timeval.c | 8 ++--- secchan/in-band.c | 64 +++++++++++++++++++------------------- secchan/ofproto.c | 18 +++++------ utilities/ovs-controller.c | 6 ++-- 6 files changed, 52 insertions(+), 52 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index aea8cc95c..811b4ee99 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -236,7 +236,7 @@ static int create_dp(int dp_idx, const char __user *devnamep) if (!dp->table) goto err_free_dp; - /* Setup our datapath device */ + /* Set up our datapath device. */ dp_dev = dp_dev_create(dp, devname, ODPP_LOCAL); err = PTR_ERR(dp_dev); if (IS_ERR(dp_dev)) diff --git a/extras/ezio/ovs-switchui.c b/extras/ezio/ovs-switchui.c index e83367d0f..7c360f37a 100644 --- a/extras/ezio/ovs-switchui.c +++ b/extras/ezio/ovs-switchui.c @@ -1486,7 +1486,7 @@ static int menu_show(const struct menu *, int start, bool select); static void cmd_shell(const struct dict *); static void cmd_show_version(const struct dict *); static void cmd_configure(const struct dict *); -static void cmd_setup_pki(const struct dict *); +static void cmd_set_up_pki(const struct dict *); static void cmd_browse_status(const struct dict *); static void cmd_show_motto(const struct dict *); @@ -1542,7 +1542,7 @@ menu(const struct dict *dict) menu_add_item(&menu, "Exit"); menu_add_item(&menu, "Show Version")->f = cmd_show_version; menu_add_item(&menu, "Configure")->f = cmd_configure; - menu_add_item(&menu, "Setup PKI")->f = cmd_setup_pki; + menu_add_item(&menu, "Set up PKI")->f = cmd_set_up_pki; if (debug_mode) { menu_add_item(&menu, "Browse Status")->f = cmd_browse_status; menu_add_item(&menu, "Shell")->f = cmd_shell; @@ -2886,7 +2886,7 @@ cmd_configure(const struct dict *dict UNUSED) } static void -cmd_setup_pki(const struct dict *dict UNUSED) +cmd_set_up_pki(const struct dict *dict UNUSED) { static const char def_privkey_file[] = "/etc/openflow-switch/of0-privkey.pem"; diff --git a/lib/timeval.c b/lib/timeval.c index 314b3f430..8ad8d0607 100644 --- a/lib/timeval.c +++ b/lib/timeval.c @@ -43,7 +43,7 @@ static struct timeval now; /* Time at which to die with SIGALRM (if not TIME_MIN). */ static time_t deadline = TIME_MIN; -static void setup_timer(void); +static void set_up_timer(void); static void sigalrm_handler(int); static void refresh_if_ticked(void); static time_t time_add(time_t, time_t); @@ -78,11 +78,11 @@ time_init(void) } /* Set up periodic signal. */ - setup_timer(); + set_up_timer(); } static void -setup_timer(void) +set_up_timer(void) { struct itimerval itimer; @@ -102,7 +102,7 @@ setup_timer(void) void time_postfork(void) { - setup_timer(); + set_up_timer(); } /* Forces a refresh of the current time from the kernel. It is not usually diff --git a/secchan/in-band.c b/secchan/in-band.c index a2197c6a0..c8f77796c 100644 --- a/secchan/in-band.c +++ b/secchan/in-band.c @@ -55,7 +55,7 @@ * * In Open vSwitch, in-band control is implemented as "hidden" flows (in * that they are not visible through OpenFlow) and at a higher priority - * than wildcarded flows can be setup by the controller. This is done + * than wildcarded flows can be set up by the controller. This is done * so that the controller cannot interfere with them and possibly break * connectivity with its switches. It is possible to see all flows, * including in-band ones, with the ovs-appctl "bridge/dump-flows" @@ -182,7 +182,7 @@ * * - Differing Controllers for Switches. All switches must know * the L3 addresses for all the controllers that other switches - * may use, since rules need to be setup to allow traffic related + * may use, since rules need to be set up to allow traffic related * to those controllers through. See rules (f), (g), (h), and (i). * * - Differing Routes for Switches. In order for the switch to @@ -337,8 +337,8 @@ drop_flow(struct in_band *in_band, int rule_idx) /* out_port and fixed_fields are assumed never to change. */ static void -setup_flow(struct in_band *in_band, int rule_idx, const flow_t *flow, - uint32_t fixed_fields, uint16_t out_port) +set_up_flow(struct in_band *in_band, int rule_idx, const flow_t *flow, + uint32_t fixed_fields, uint16_t out_port) { struct ib_rule *rule = &in_band->rules[rule_idx]; @@ -460,28 +460,28 @@ in_band_run(struct in_band *in_band) flow.nw_proto = IP_TYPE_UDP; flow.tp_src = htons(DHCP_CLIENT_PORT); flow.tp_dst = htons(DHCP_SERVER_PORT); - setup_flow(in_band, IBR_FROM_LOCAL_DHCP, &flow, - (OFPFW_IN_PORT | OFPFW_DL_TYPE | OFPFW_DL_SRC - | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST), - OFPP_NORMAL); + set_up_flow(in_band, IBR_FROM_LOCAL_DHCP, &flow, + (OFPFW_IN_PORT | OFPFW_DL_TYPE | OFPFW_DL_SRC + | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST), + OFPP_NORMAL); /* Allow the connection's interface to receive directed ARP traffic. */ memset(&flow, 0, sizeof flow); flow.dl_type = htons(ETH_TYPE_ARP); memcpy(flow.dl_dst, local_mac, ETH_ADDR_LEN); flow.nw_proto = ARP_OP_REPLY; - setup_flow(in_band, IBR_TO_LOCAL_ARP, &flow, - (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO), - OFPP_NORMAL); + set_up_flow(in_band, IBR_TO_LOCAL_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO), + OFPP_NORMAL); /* Allow the connection's interface to be the source of ARP traffic. */ memset(&flow, 0, sizeof flow); flow.dl_type = htons(ETH_TYPE_ARP); memcpy(flow.dl_src, local_mac, ETH_ADDR_LEN); flow.nw_proto = ARP_OP_REQUEST; - setup_flow(in_band, IBR_FROM_LOCAL_ARP, &flow, - (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO), - OFPP_NORMAL); + set_up_flow(in_band, IBR_FROM_LOCAL_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO), + OFPP_NORMAL); } else { drop_flow(in_band, IBR_TO_LOCAL_ARP); drop_flow(in_band, IBR_FROM_LOCAL_ARP); @@ -493,18 +493,18 @@ in_band_run(struct in_band *in_band) flow.dl_type = htons(ETH_TYPE_ARP); memcpy(flow.dl_dst, remote_mac, ETH_ADDR_LEN); flow.nw_proto = ARP_OP_REPLY; - setup_flow(in_band, IBR_TO_REMOTE_ARP, &flow, - (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO), - OFPP_NORMAL); + set_up_flow(in_band, IBR_TO_REMOTE_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_DL_DST | OFPFW_NW_PROTO), + OFPP_NORMAL); /* Allow ARP requests from the remote side's MAC. */ memset(&flow, 0, sizeof flow); flow.dl_type = htons(ETH_TYPE_ARP); memcpy(flow.dl_src, remote_mac, ETH_ADDR_LEN); flow.nw_proto = ARP_OP_REQUEST; - setup_flow(in_band, IBR_FROM_REMOTE_ARP, &flow, - (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO), - OFPP_NORMAL); + set_up_flow(in_band, IBR_FROM_REMOTE_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_DL_SRC | OFPFW_NW_PROTO), + OFPP_NORMAL); } else { drop_flow(in_band, IBR_TO_REMOTE_ARP); drop_flow(in_band, IBR_FROM_REMOTE_ARP); @@ -516,18 +516,18 @@ in_band_run(struct in_band *in_band) flow.dl_type = htons(ETH_TYPE_ARP); flow.nw_proto = ARP_OP_REPLY; flow.nw_dst = controller_ip; - setup_flow(in_band, IBR_TO_CTL_ARP, &flow, - (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_DST_MASK), - OFPP_NORMAL); + set_up_flow(in_band, IBR_TO_CTL_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_DST_MASK), + OFPP_NORMAL); /* Allow ARP requests from the controller's IP. */ memset(&flow, 0, sizeof flow); flow.dl_type = htons(ETH_TYPE_ARP); flow.nw_proto = ARP_OP_REQUEST; flow.nw_src = controller_ip; - setup_flow(in_band, IBR_FROM_CTL_ARP, &flow, - (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK), - OFPP_NORMAL); + set_up_flow(in_band, IBR_FROM_CTL_ARP, &flow, + (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK), + OFPP_NORMAL); /* OpenFlow traffic to or from the controller. * @@ -541,12 +541,12 @@ in_band_run(struct in_band *in_band) flow.nw_dst = controller_ip; flow.tp_src = htons(OFP_TCP_PORT); flow.tp_dst = htons(OFP_TCP_PORT); - setup_flow(in_band, IBR_TO_CTL_OFP, &flow, - (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_DST_MASK - | OFPFW_TP_DST), OFPP_NORMAL); - setup_flow(in_band, IBR_FROM_CTL_OFP, &flow, - (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK - | OFPFW_TP_SRC), OFPP_NORMAL); + set_up_flow(in_band, IBR_TO_CTL_OFP, &flow, + (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_DST_MASK + | OFPFW_TP_DST), OFPP_NORMAL); + set_up_flow(in_band, IBR_FROM_CTL_OFP, &flow, + (OFPFW_DL_TYPE | OFPFW_NW_PROTO | OFPFW_NW_SRC_MASK + | OFPFW_TP_SRC), OFPP_NORMAL); } else { drop_flow(in_band, IBR_TO_CTL_ARP); drop_flow(in_band, IBR_FROM_CTL_ARP); diff --git a/secchan/ofproto.c b/secchan/ofproto.c index 94c4bfe3f..2453055b8 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -82,7 +82,7 @@ static int xlate_actions(const union ofp_action *in, size_t n_in, const flow_t *flow, struct ofproto *ofproto, const struct ofpbuf *packet, struct odp_actions *out, tag_type *tags, - bool *may_setup_flow); + bool *may_set_up_flow); struct rule { struct cls_rule cr; @@ -1919,7 +1919,7 @@ struct action_xlate_ctx { /* Output. */ struct odp_actions *out; /* Datapath actions. */ tag_type *tags; /* Tags associated with OFPP_NORMAL actions. */ - bool may_setup_flow; /* True ordinarily; false if the actions must + bool may_set_up_flow; /* True ordinarily; false if the actions must * be reassessed for every packet. */ }; @@ -2007,7 +2007,7 @@ xlate_output_action(struct action_xlate_ctx *ctx, ctx->out, ctx->tags, ctx->ofproto->aux)) { COVERAGE_INC(ofproto_uninstallable); - ctx->may_setup_flow = false; + ctx->may_set_up_flow = false; } break; case OFPP_FLOOD: @@ -2127,7 +2127,7 @@ static int xlate_actions(const union ofp_action *in, size_t n_in, const flow_t *flow, struct ofproto *ofproto, const struct ofpbuf *packet, - struct odp_actions *out, tag_type *tags, bool *may_setup_flow) + struct odp_actions *out, tag_type *tags, bool *may_set_up_flow) { tag_type no_tags = 0; struct action_xlate_ctx ctx; @@ -2139,17 +2139,17 @@ xlate_actions(const union ofp_action *in, size_t n_in, ctx.packet = packet; ctx.out = out; ctx.tags = tags ? tags : &no_tags; - ctx.may_setup_flow = true; + ctx.may_set_up_flow = true; do_xlate_actions(in, n_in, &ctx); - /* Check with in-band control to see if we're allowed to setup this + /* Check with in-band control to see if we're allowed to set up this * flow. */ if (!in_band_rule_check(ofproto->in_band, flow, out)) { - ctx.may_setup_flow = false; + ctx.may_set_up_flow = false; } - if (may_setup_flow) { - *may_setup_flow = ctx.may_setup_flow; + if (may_set_up_flow) { + *may_set_up_flow = ctx.may_set_up_flow; } if (odp_actions_overflow(out)) { odp_actions_init(out); diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c index 314da1868..30ab52dbe 100644 --- a/utilities/ovs-controller.c +++ b/utilities/ovs-controller.c @@ -53,7 +53,7 @@ struct switch_ { static bool learn_macs = true; /* Set up flows? (If not, every packet is processed at the controller.) */ -static bool setup_flows = true; +static bool set_up_flows = true; /* --max-idle: Maximum idle time, in seconds, before flows expire. */ static int max_idle = 60; @@ -202,7 +202,7 @@ new_switch(struct switch_ *sw, struct vconn *vconn, const char *name) { sw->rconn = rconn_new_from_vconn(name, vconn); sw->lswitch = lswitch_create(sw->rconn, learn_macs, - setup_flows ? max_idle : -1); + set_up_flows ? max_idle : -1); } static int @@ -268,7 +268,7 @@ parse_options(int argc, char *argv[]) break; case 'n': - setup_flows = false; + set_up_flows = false; break; case OPT_MUTE: From 6a07af3678afc64e39a330fc07362a940ef1ecce Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 22 Oct 2009 17:51:05 -0700 Subject: [PATCH 05/23] netflow: Populate NetFlow output interface field. Previously NetFlow expiration messages always contained 0 as the output interface index. This changes that to report the OpenFlow interface instead. Feature #1202 --- secchan/netflow.c | 4 +-- secchan/netflow.h | 6 ++++ secchan/ofproto.c | 62 ++++++++++++++++++++++++++------- secchan/ofproto.h | 4 ++- vswitchd/bridge.c | 21 +++++++---- vswitchd/ovs-vswitchd.conf.5.in | 9 ++++- 6 files changed, 83 insertions(+), 23 deletions(-) diff --git a/secchan/netflow.c b/secchan/netflow.c index 7912b4b88..282fd8342 100644 --- a/secchan/netflow.c +++ b/secchan/netflow.c @@ -196,10 +196,10 @@ netflow_expire(struct netflow *nf, const struct ofexpired *expired) if (nf->add_id_to_iface) { uint16_t iface = (nf->engine_id & 0x7f) << 9; nf_rec->input = htons(iface | (expired->flow.in_port & 0x1ff)); - nf_rec->output = htons(iface); + nf_rec->output = htons(iface | (expired->output_iface & 0x1ff)); } else { nf_rec->input = htons(expired->flow.in_port); - nf_rec->output = htons(0); + nf_rec->output = htons(expired->output_iface); } nf_rec->packet_count = htonl(MIN(expired->packet_count, UINT32_MAX)); nf_rec->byte_count = htonl(MIN(expired->byte_count, UINT32_MAX)); diff --git a/secchan/netflow.h b/secchan/netflow.h index 13be90b49..a5ad3360e 100644 --- a/secchan/netflow.h +++ b/secchan/netflow.h @@ -22,6 +22,12 @@ struct ofexpired; struct svec; +enum netflow_output_ports { + NF_OUT_FLOOD = UINT16_MAX, + NF_OUT_MULTI = UINT16_MAX - 1, + NF_OUT_DROP = UINT16_MAX - 2 +}; + struct netflow *netflow_create(void); void netflow_destroy(struct netflow *); int netflow_set_collectors(struct netflow *, const struct svec *collectors); diff --git a/secchan/ofproto.c b/secchan/ofproto.c index 2453055b8..62a37bf42 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -82,7 +82,7 @@ static int xlate_actions(const union ofp_action *in, size_t n_in, const flow_t *flow, struct ofproto *ofproto, const struct ofpbuf *packet, struct odp_actions *out, tag_type *tags, - bool *may_set_up_flow); + bool *may_set_up_flow, uint16_t *nf_output_iface); struct rule { struct cls_rule cr; @@ -97,6 +97,7 @@ struct rule { uint8_t tcp_flags; /* Bitwise-OR of all TCP flags seen. */ uint8_t ip_tos; /* Last-seen IP type-of-service. */ tag_type tags; /* Tags (set only by hooks). */ + uint16_t nf_output_iface; /* Output interface index for NetFlow. */ /* If 'super' is non-NULL, this rule is a subrule, that is, it is an * exact-match rule (having cr.wc.wildcards of 0) generated from the @@ -971,7 +972,7 @@ ofproto_send_packet(struct ofproto *p, const flow_t *flow, int error; error = xlate_actions(actions, n_actions, flow, p, packet, &odp_actions, - NULL, NULL); + NULL, NULL, NULL); if (error) { return error; } @@ -1486,7 +1487,7 @@ rule_execute(struct ofproto *ofproto, struct rule *rule, if (rule->cr.wc.wildcards || !flow_equal(flow, &rule->cr.flow)) { struct rule *super = rule->super ? rule->super : rule; if (xlate_actions(super->actions, super->n_actions, flow, ofproto, - packet, &a, NULL, 0)) { + packet, &a, NULL, 0, NULL)) { return; } actions = a.actions; @@ -1582,7 +1583,8 @@ rule_make_actions(struct ofproto *p, struct rule *rule, super = rule->super ? rule->super : rule; rule->tags = 0; xlate_actions(super->actions, super->n_actions, &rule->cr.flow, p, - packet, &a, &rule->tags, &rule->may_install); + packet, &a, &rule->tags, &rule->may_install, + &rule->nf_output_iface); actions_len = a.n_actions * sizeof *a.actions; if (rule->n_odp_actions != a.n_actions @@ -1700,9 +1702,17 @@ static void rule_post_uninstall(struct ofproto *ofproto, struct rule *rule) { struct rule *super = rule->super; + bool controller_action; rule_account(ofproto, rule, 0); - if (ofproto->netflow && rule->byte_count) { + + /* If the only action is send to the controller then don't report + * NetFlow expiration messages since it is just part of the control + * logic for the network and not real traffic. */ + controller_action = rule->n_odp_actions == 1 && + rule->odp_actions[0].type == ODPAT_CONTROLLER; + + if (ofproto->netflow && rule->byte_count && !controller_action) { struct ofexpired expired; expired.flow = rule->cr.flow; expired.packet_count = rule->packet_count; @@ -1711,6 +1721,7 @@ rule_post_uninstall(struct ofproto *ofproto, struct rule *rule) expired.created = rule->created; expired.tcp_flags = rule->tcp_flags; expired.ip_tos = rule->ip_tos; + expired.output_iface = rule->nf_output_iface; netflow_expire(ofproto->netflow, &expired); } if (super) { @@ -1894,9 +1905,14 @@ handle_set_config(struct ofproto *p, struct ofconn *ofconn, } static void -add_output_group_action(struct odp_actions *actions, uint16_t group) +add_output_group_action(struct odp_actions *actions, uint16_t group, + uint16_t *nf_output_iface) { odp_actions_add(actions, ODPAT_OUTPUT_GROUP)->output_group.group = group; + + if (group == DP_GROUP_ALL || group == DP_GROUP_FLOOD) { + *nf_output_iface = NF_OUT_FLOOD; + } } static void @@ -1921,6 +1937,7 @@ struct action_xlate_ctx { tag_type *tags; /* Tags associated with OFPP_NORMAL actions. */ bool may_set_up_flow; /* True ordinarily; false if the actions must * be reassessed for every packet. */ + uint16_t nf_output_iface; /* Output interface index for NetFlow. */ }; static void do_xlate_actions(const union ofp_action *in, size_t n_in, @@ -1945,6 +1962,7 @@ add_output_action(struct action_xlate_ctx *ctx, uint16_t port) } odp_actions_add(ctx->out, ODPAT_OUTPUT)->output.port = port; + ctx->nf_output_iface = port; } static struct rule * @@ -1994,6 +2012,9 @@ xlate_output_action(struct action_xlate_ctx *ctx, const struct ofp_action_output *oao) { uint16_t odp_port; + uint16_t prev_nf_output_iface = ctx->nf_output_iface; + + ctx->nf_output_iface = NF_OUT_DROP; switch (ntohs(oao->port)) { case OFPP_IN_PORT: @@ -2005,16 +2026,18 @@ xlate_output_action(struct action_xlate_ctx *ctx, case OFPP_NORMAL: if (!ctx->ofproto->ofhooks->normal_cb(ctx->flow, ctx->packet, ctx->out, ctx->tags, + &ctx->nf_output_iface, ctx->ofproto->aux)) { COVERAGE_INC(ofproto_uninstallable); ctx->may_set_up_flow = false; } break; case OFPP_FLOOD: - add_output_group_action(ctx->out, DP_GROUP_FLOOD); + add_output_group_action(ctx->out, DP_GROUP_FLOOD, + &ctx->nf_output_iface); break; case OFPP_ALL: - add_output_group_action(ctx->out, DP_GROUP_ALL); + add_output_group_action(ctx->out, DP_GROUP_ALL, &ctx->nf_output_iface); break; case OFPP_CONTROLLER: add_controller_action(ctx->out, oao); @@ -2029,6 +2052,15 @@ xlate_output_action(struct action_xlate_ctx *ctx, } break; } + + if (prev_nf_output_iface == NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_FLOOD; + } else if (ctx->nf_output_iface == NF_OUT_DROP) { + ctx->nf_output_iface = prev_nf_output_iface; + } else if (prev_nf_output_iface != NF_OUT_DROP && + ctx->nf_output_iface != NF_OUT_FLOOD) { + ctx->nf_output_iface = NF_OUT_MULTI; + } } static void @@ -2127,7 +2159,8 @@ static int xlate_actions(const union ofp_action *in, size_t n_in, const flow_t *flow, struct ofproto *ofproto, const struct ofpbuf *packet, - struct odp_actions *out, tag_type *tags, bool *may_set_up_flow) + struct odp_actions *out, tag_type *tags, bool *may_set_up_flow, + uint16_t *nf_output_iface) { tag_type no_tags = 0; struct action_xlate_ctx ctx; @@ -2140,6 +2173,7 @@ xlate_actions(const union ofp_action *in, size_t n_in, ctx.out = out; ctx.tags = tags ? tags : &no_tags; ctx.may_set_up_flow = true; + ctx.nf_output_iface = NF_OUT_DROP; do_xlate_actions(in, n_in, &ctx); /* Check with in-band control to see if we're allowed to set up this @@ -2151,6 +2185,9 @@ xlate_actions(const union ofp_action *in, size_t n_in, if (may_set_up_flow) { *may_set_up_flow = ctx.may_set_up_flow; } + if (nf_output_iface) { + *nf_output_iface = ctx.nf_output_iface; + } if (odp_actions_overflow(out)) { odp_actions_init(out); return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_TOO_MANY); @@ -2190,7 +2227,7 @@ handle_packet_out(struct ofproto *p, struct ofconn *ofconn, flow_extract(&payload, ofp_port_to_odp_port(ntohs(opo->in_port)), &flow); error = xlate_actions((const union ofp_action *) opo->actions, n_actions, - &flow, p, &payload, &actions, NULL, NULL); + &flow, p, &payload, &actions, NULL, NULL, NULL); if (error) { return error; } @@ -3395,7 +3432,7 @@ pick_fallback_dpid(void) static bool default_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet, struct odp_actions *actions, tag_type *tags, - void *ofproto_) + uint16_t *nf_output_iface, void *ofproto_) { struct ofproto *ofproto = ofproto_; int out_port; @@ -3422,9 +3459,10 @@ default_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet, /* Determine output port. */ out_port = mac_learning_lookup_tag(ofproto->ml, flow->dl_dst, 0, tags); if (out_port < 0) { - add_output_group_action(actions, DP_GROUP_FLOOD); + add_output_group_action(actions, DP_GROUP_FLOOD, nf_output_iface); } else if (out_port != flow->in_port) { odp_actions_add(actions, ODPAT_OUTPUT)->output.port = out_port; + *nf_output_iface = out_port; } else { /* Drop. */ } diff --git a/secchan/ofproto.h b/secchan/ofproto.h index 398cac4fd..8847deed0 100644 --- a/secchan/ofproto.h +++ b/secchan/ofproto.h @@ -36,6 +36,7 @@ struct ofexpired { long long int created; /* Creation time. */ uint8_t tcp_flags; /* Bitwise-OR of all TCP flags seen. */ uint8_t ip_tos; /* Last-seen IP type-of-service. */ + uint16_t output_iface; /* Output interface index. */ }; int ofproto_create(const char *datapath, const struct ofhooks *, void *aux, @@ -99,7 +100,8 @@ struct ofhooks { void (*port_changed_cb)(enum ofp_port_reason, const struct ofp_phy_port *, void *aux); bool (*normal_cb)(const flow_t *, const struct ofpbuf *packet, - struct odp_actions *, tag_type *, void *aux); + struct odp_actions *, tag_type *, + uint16_t *nf_output_iface, void *aux); void (*account_flow_cb)(const flow_t *, const union odp_action *, size_t n_actions, unsigned long long int n_bytes, void *aux); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 157dc6866..fcdd86603 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -48,6 +48,7 @@ #include "port-array.h" #include "proc-net-compat.h" #include "process.h" +#include "secchan/netflow.h" #include "secchan/ofproto.h" #include "socket-util.h" #include "stp.h" @@ -1677,7 +1678,7 @@ port_includes_vlan(const struct port *port, uint16_t vlan) static size_t compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan, const struct port *in_port, const struct port *out_port, - struct dst dsts[], tag_type *tags) + struct dst dsts[], tag_type *tags, uint16_t *nf_output_iface) { mirror_mask_t mirrors = in_port->src_mirrors; struct dst *dst = dsts; @@ -1696,7 +1697,9 @@ compose_dsts(const struct bridge *br, const flow_t *flow, uint16_t vlan, dst++; } } + *nf_output_iface = NF_OUT_FLOOD; } else if (out_port && set_dst(dst, flow, in_port, out_port, tags)) { + *nf_output_iface = dst->dp_ifidx; mirrors |= out_port->dst_mirrors; dst++; } @@ -1764,14 +1767,16 @@ print_dsts(const struct dst *dsts, size_t n) static void compose_actions(struct bridge *br, const flow_t *flow, uint16_t vlan, const struct port *in_port, const struct port *out_port, - tag_type *tags, struct odp_actions *actions) + tag_type *tags, struct odp_actions *actions, + uint16_t *nf_output_iface) { struct dst dsts[DP_MAX_PORTS * (MAX_MIRRORS + 1)]; size_t n_dsts; const struct dst *p; uint16_t cur_vlan; - n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags); + n_dsts = compose_dsts(br, flow, vlan, in_port, out_port, dsts, tags, + nf_output_iface); cur_vlan = ntohs(flow->dl_vlan); for (p = dsts; p < &dsts[n_dsts]; p++) { @@ -1804,7 +1809,7 @@ is_bcast_arp_reply(const flow_t *flow) static bool process_flow(struct bridge *br, const flow_t *flow, const struct ofpbuf *packet, struct odp_actions *actions, - tag_type *tags) + tag_type *tags, uint16_t *nf_output_iface) { struct iface *in_iface; struct port *in_port; @@ -1960,7 +1965,8 @@ process_flow(struct bridge *br, const flow_t *flow, } done: - compose_actions(br, flow, vlan, in_port, out_port, tags, actions); + compose_actions(br, flow, vlan, in_port, out_port, tags, actions, + nf_output_iface); return true; } @@ -2005,7 +2011,8 @@ bridge_port_changed_ofhook_cb(enum ofp_port_reason reason, static bool bridge_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet, - struct odp_actions *actions, tag_type *tags, void *br_) + struct odp_actions *actions, tag_type *tags, + uint16_t *nf_output_iface, void *br_) { struct bridge *br = br_; @@ -2018,7 +2025,7 @@ bridge_normal_ofhook_cb(const flow_t *flow, const struct ofpbuf *packet, #endif COVERAGE_INC(bridge_process_flow); - return process_flow(br, flow, packet, actions, tags); + return process_flow(br, flow, packet, actions, tags, nf_output_iface); } static void diff --git a/vswitchd/ovs-vswitchd.conf.5.in b/vswitchd/ovs-vswitchd.conf.5.in index 7f989b40f..f998e4900 100644 --- a/vswitchd/ovs-vswitchd.conf.5.in +++ b/vswitchd/ovs-vswitchd.conf.5.in @@ -432,7 +432,14 @@ flows from multiple switches appearing as if they came on the interface, add \fBnetflow.\fIbridge\fB.add-id-to-iface=true\fR to the configuration file. This will place the least significant 7 bits of the engine id into the most significant bits of the ingress and egress interface fields -of flow records. By default, this behavior is disabled. +of flow records. When this option is enabled, a maximum of 508 ports are +supported. By default, this behavior is disabled. + +The egress interface field normally contains the OpenFlow port number, +however, certain port values have special meaning: 0xFFFF indicates +flooding, 0xFFFE is multiple controller-specified output interfaces, and +0xFFFD means that packets from the flow were dropped. If add-id-to-iface +is enabled then these values become 0x1FF, 0x1FE, and 0x1FD respectively. The following syntax sends NetFlow records for \fBmybr\fR to the NetFlow collector \fBnflow.example.com\fR on UDP port \fB9995\fR: From 14a34fe401551c7839306e3cf7a22c5bf83b52a5 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Wed, 28 Oct 2009 12:01:36 -0700 Subject: [PATCH 06/23] xenserver: Print program name on error in dump-vif-details --- xenserver/root_vswitch_scripts_dump-vif-details | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xenserver/root_vswitch_scripts_dump-vif-details b/xenserver/root_vswitch_scripts_dump-vif-details index b0ceb4054..387c31b33 100755 --- a/xenserver/root_vswitch_scripts_dump-vif-details +++ b/xenserver/root_vswitch_scripts_dump-vif-details @@ -62,7 +62,7 @@ def dump_vif_info(domid, devid, vif_ref): if __name__ == '__main__': if (len(sys.argv) != 3): - sys.stderr.write("ERROR: %s \n") + sys.stderr.write("ERROR: %s \n" % sys.argv[0]) sys.exit(1) domid = sys.argv[1] From 3d3d15a0e6e9104f6c29e3ab21f4ea7189dccbab Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Wed, 28 Oct 2009 17:15:57 -0700 Subject: [PATCH 07/23] openflow: Fix endian issues in flow expiration messages A few of the fields in the OpenFlow flow expiration message were being sent in host-byte order. This properly converts them to network. Thanks to David Erickson for catching this! --- secchan/ofproto.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/secchan/ofproto.c b/secchan/ofproto.c index 62a37bf42..7fb0c646f 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -3206,9 +3206,9 @@ compose_flow_exp(const struct rule *rule, long long int now, uint8_t reason) flow_to_match(&rule->cr.flow, rule->cr.wc.wildcards, &ofe->match); ofe->priority = htons(rule->cr.priority); ofe->reason = reason; - ofe->duration = (now - rule->created) / 1000; - ofe->packet_count = rule->packet_count; - ofe->byte_count = rule->byte_count; + ofe->duration = htonl((now - rule->created) / 1000); + ofe->packet_count = htonll(rule->packet_count); + ofe->byte_count = htonll(rule->byte_count); return buf; } From d99e262b28c34442045e53df746937432853809c Mon Sep 17 00:00:00 2001 From: Reid Price Date: Fri, 30 Oct 2009 12:39:14 -0700 Subject: [PATCH 08/23] dump-vif-details: Safeguard 'finally' code This makes several minor streamlining changes to dump-vif-details, and moves the try statement in dump_vif_info to exclude session initialization, so that finally will not obscure the original exception with a new exception related to the session variable when logins fail. --- .../root_vswitch_scripts_dump-vif-details | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/xenserver/root_vswitch_scripts_dump-vif-details b/xenserver/root_vswitch_scripts_dump-vif-details index 387c31b33..7ce8bf781 100755 --- a/xenserver/root_vswitch_scripts_dump-vif-details +++ b/xenserver/root_vswitch_scripts_dump-vif-details @@ -25,22 +25,20 @@ def get_vif_ref(domid, devid): # Query XAPI for the information we need using the vif's opaque reference def dump_vif_info(domid, devid, vif_ref): + vif_info = [] + session = XenAPI.xapi_local() + session.xenapi.login_with_password("root", "") try: - session = XenAPI.xapi_local() - session.xenapi.login_with_password("root", "") vif_rec = session.xenapi.VIF.get_record(vif_ref) net_rec = session.xenapi.network.get_record(vif_rec["network"]) - vm_rec = session.xenapi.VM.get_record(vif_rec["VM"]) + vm_uuid = session.xenapi.VM.get_uuid(vif_rec["VM"]) # Data to allow vNetManager to associate VIFs with xapi data - sys.stdout.write('--add=port.vif%s.%s.net-uuid=%s ' - % (domid, devid, net_rec["uuid"])) - sys.stdout.write('--add=port.vif%s.%s.vif-mac=%s ' - % (domid, devid, vif_rec["MAC"])) - sys.stdout.write('--add=port.vif%s.%s.vif-uuid=%s ' - % (domid, devid, vif_rec["uuid"])) - sys.stdout.write('--add=port.vif%s.%s.vm-uuid=%s ' - % (domid, devid, vm_rec["uuid"])) + add_port = '--add=port.vif%s.%s' % (domid, devid) + vif_info.append('%s.net-uuid=%s' % (add_port, net_rec["uuid"])) + vif_info.append('%s.vif-mac=%s' % (add_port, vif_rec["MAC"])) + vif_info.append('%s.vif-uuid=%s' % (add_port, vif_rec["uuid"])) + vif_info.append('%s.vm-uuid=%s' % (add_port, vm_uuid)) # vNetManager needs to know the network UUID(s) associated with # each datapath. Normally interface-reconfigure adds them, but @@ -52,16 +50,17 @@ def dump_vif_info(domid, devid, vif_ref): # There may still be a brief delay between the initial # ovs-vswitchd connection to vNetManager and setting this # configuration variable, but vNetManager can tolerate that. - if len(net_rec['PIFs']) == 0: + if not net_rec['PIFs']: key = 'bridge.%s.xs-network-uuids' % net_rec['bridge'] value = net_rec['uuid'] - sys.stdout.write('--del-match=%s=* ' % key) - sys.stdout.write('--add=%s=%s ' % (key, value)) + vif_info.append('--del-match=%s=*' % key) + vif_info.append('--add=%s=%s' % (key, value)) finally: session.xenapi.session.logout() + print ' '.join(vif_info) if __name__ == '__main__': - if (len(sys.argv) != 3): + if len(sys.argv) != 3: sys.stderr.write("ERROR: %s \n" % sys.argv[0]) sys.exit(1) @@ -71,7 +70,7 @@ if __name__ == '__main__': vif_ref = get_vif_ref(domid, devid) if not vif_ref: sys.stderr.write("ERROR: Could not find interface vif%s.%s\n" - % (domid, devid)) + % (domid, devid)) sys.exit(1) dump_vif_info(domid, devid, vif_ref) From 0c0afbecaace0509d7b2e5b7dee4b4eac7b31e19 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 28 Oct 2009 16:05:57 -0700 Subject: [PATCH 09/23] ofproto: Only zero stats for non exact-match sub-rules. We zero the stats on sub-rules after they expire to prevent them from being counted twice in their super-rule if they are reinstalled. However, for exact-match sub-rules this means that the OpenFlow stats are always zero. This changes that to only zero the stats for non exact match rules. Bug #1911 --- secchan/ofproto.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/secchan/ofproto.c b/secchan/ofproto.c index 7fb0c646f..4266cbf71 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -1731,15 +1731,15 @@ rule_post_uninstall(struct ofproto *ofproto, struct rule *rule) if (rule->packet_count) { super->ip_tos = rule->ip_tos; } - } - /* Reset counters to prevent double counting if the rule ever gets - * reinstalled. */ - rule->packet_count = 0; - rule->byte_count = 0; - rule->accounted_bytes = 0; - rule->tcp_flags = 0; - rule->ip_tos = 0; + /* Reset counters to prevent double counting if the rule ever gets + * reinstalled. */ + rule->packet_count = 0; + rule->byte_count = 0; + rule->accounted_bytes = 0; + rule->tcp_flags = 0; + rule->ip_tos = 0; + } } static void From 6d91c2fb6c169ac84ace876a4c34929cb84e26ae Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 28 Oct 2009 14:36:52 -0700 Subject: [PATCH 10/23] datapath: Allow TCP flags to be cleared. When querying flow stats allow the TCP flags to be reset. Since the datapath ORs together all flags that have previously been seen it is otherwise impossible to determine the set of flags from after a particular time. --- datapath/datapath.c | 15 ++++++++++----- include/openvswitch/datapath-protocol.h | 4 ++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 811b4ee99..1b5f57f8b 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -978,13 +978,18 @@ static int put_actions(const struct sw_flow *flow, struct odp_flow __user *ufp) return 0; } -static int answer_query(struct sw_flow *flow, struct odp_flow __user *ufp) +static int answer_query(struct sw_flow *flow, u32 query_flags, + struct odp_flow __user *ufp) { struct odp_flow_stats stats; unsigned long int flags; spin_lock_irqsave(&flow->lock, flags); get_stats(flow, &stats); + + if (query_flags & ODPFF_ZERO_TCP_FLAGS) { + flow->tcp_flags = 0; + } spin_unlock_irqrestore(&flow->lock, flags); if (__copy_to_user(&ufp->stats, &stats, sizeof(struct odp_flow_stats))) @@ -1022,10 +1027,10 @@ static int del_or_query_flow(struct datapath *dp, * to make sure that we get completely accurate stats, but that * blows our performance, badly. */ dp->n_flows--; - error = answer_query(flow, ufp); + error = answer_query(flow, 0, ufp); flow_deferred_free(flow); } else { - error = answer_query(flow, ufp); + error = answer_query(flow, uf.flags, ufp); } error: @@ -1051,7 +1056,7 @@ static int query_multiple_flows(struct datapath *dp, if (!flow) error = __clear_user(&ufp->stats, sizeof ufp->stats); else - error = answer_query(flow, ufp); + error = answer_query(flow, 0, ufp); if (error) return -EFAULT; } @@ -1072,7 +1077,7 @@ static int list_flow(struct sw_flow *flow, void *cbdata_) if (__copy_to_user(&ufp->key, &flow->key, sizeof flow->key)) return -EFAULT; - error = answer_query(flow, ufp); + error = answer_query(flow, 0, ufp); if (error) return error; diff --git a/include/openvswitch/datapath-protocol.h b/include/openvswitch/datapath-protocol.h index bbc29f6d0..a532d7119 100644 --- a/include/openvswitch/datapath-protocol.h +++ b/include/openvswitch/datapath-protocol.h @@ -167,11 +167,15 @@ struct odp_flow_key { __u8 reserved; /* Pad to 64 bits. */ }; +/* Flags for ODP_FLOW. */ +#define ODPFF_ZERO_TCP_FLAGS (1 << 0) /* Zero the TCP flags. */ + struct odp_flow { struct odp_flow_stats stats; struct odp_flow_key key; union odp_action *actions; __u32 n_actions; + __u32 flags; }; /* Flags for ODP_FLOW_PUT. */ From 0193b2afceb14b2daccc5d91395903f88606253c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 3 Nov 2009 12:25:29 -0800 Subject: [PATCH 11/23] netflow: Implement NetFlow active timeouts. Provides a NetFlow expiration message at regular intervals if the key netflow.
.active-timeout is set. Feature #1317 --- secchan/main.c | 5 +- secchan/netflow.c | 113 +++++++++++++++++++----- secchan/netflow.h | 36 ++++++-- secchan/ofproto.c | 147 ++++++++++++++++++++------------ secchan/ofproto.h | 13 ++- vswitchd/bridge.c | 40 +++++---- vswitchd/ovs-vswitchd.conf.5.in | 24 ++++-- 7 files changed, 263 insertions(+), 115 deletions(-) diff --git a/secchan/main.c b/secchan/main.c index ee29f27be..ac711bad6 100644 --- a/secchan/main.c +++ b/secchan/main.c @@ -118,6 +118,7 @@ main(int argc, char *argv[]) struct ofproto *ofproto; struct ofsettings s; int error; + struct netflow_options nf_options; set_program_name(argv[0]); register_fault_handlers(); @@ -168,7 +169,9 @@ main(int argc, char *argv[]) ovs_fatal(error, "failed to configure controller snooping connections"); } - error = ofproto_set_netflow(ofproto, &s.netflow, 0, 0, false); + memset(&nf_options, 0, sizeof nf_options); + nf_options.collectors = s.netflow; + error = ofproto_set_netflow(ofproto, &nf_options); if (error) { ovs_fatal(error, "failed to configure NetFlow collectors"); } diff --git a/secchan/netflow.c b/secchan/netflow.c index 282fd8342..0505cd33c 100644 --- a/secchan/netflow.c +++ b/secchan/netflow.c @@ -37,6 +37,8 @@ #define NETFLOW_V5_VERSION 5 +static const int ACTIVE_TIMEOUT_DEFAULT = 600; + /* Every NetFlow v5 message contains the header that follows. This is * followed by up to thirty records that describe a terminating flow. * We only send a single record per NetFlow message. @@ -100,6 +102,8 @@ struct netflow { * bits of the interface fields. */ uint32_t netflow_cnt; /* Flow sequence number for NetFlow. */ struct ofpbuf packet; /* NetFlow packet being accumulated. */ + long long int active_timeout; /* Timeout for flows that are still active. */ + long long int reconfig_time; /* When we reconfigured the timeouts. */ }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -160,14 +164,19 @@ open_collector(char *dst) } void -netflow_expire(struct netflow *nf, const struct ofexpired *expired) +netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow, + struct ofexpired *expired) { struct netflow_v5_header *nf_hdr; struct netflow_v5_record *nf_rec; struct timeval now; - /* NetFlow only reports on IP packets. */ - if (expired->flow.dl_type != htons(ETH_TYPE_IP)) { + nf_flow->last_expired += nf->active_timeout; + + /* NetFlow only reports on IP packets and we should only report flows + * that actually have traffic. */ + if (expired->flow.dl_type != htons(ETH_TYPE_IP) || + expired->packet_count - nf_flow->packet_count_off == 0) { return; } @@ -196,15 +205,17 @@ netflow_expire(struct netflow *nf, const struct ofexpired *expired) if (nf->add_id_to_iface) { uint16_t iface = (nf->engine_id & 0x7f) << 9; nf_rec->input = htons(iface | (expired->flow.in_port & 0x1ff)); - nf_rec->output = htons(iface | (expired->output_iface & 0x1ff)); + nf_rec->output = htons(iface | (nf_flow->output_iface & 0x1ff)); } else { nf_rec->input = htons(expired->flow.in_port); - nf_rec->output = htons(expired->output_iface); + nf_rec->output = htons(nf_flow->output_iface); } - nf_rec->packet_count = htonl(MIN(expired->packet_count, UINT32_MAX)); - nf_rec->byte_count = htonl(MIN(expired->byte_count, UINT32_MAX)); - nf_rec->init_time = htonl(expired->created - nf->boot_time); - nf_rec->used_time = htonl(MAX(expired->created, expired->used) + nf_rec->packet_count = htonl(MIN(expired->packet_count - + nf_flow->packet_count_off, UINT32_MAX)); + nf_rec->byte_count = htonl(MIN(expired->byte_count - + nf_flow->byte_count_off, UINT32_MAX)); + nf_rec->init_time = htonl(nf_flow->created - nf->boot_time); + nf_rec->used_time = htonl(MAX(nf_flow->created, expired->used) - nf->boot_time); if (expired->flow.nw_proto == IP_TYPE_ICMP) { /* In NetFlow, the ICMP type and code are concatenated and @@ -217,9 +228,15 @@ netflow_expire(struct netflow *nf, const struct ofexpired *expired) nf_rec->src_port = expired->flow.tp_src; nf_rec->dst_port = expired->flow.tp_dst; } - nf_rec->tcp_flags = expired->tcp_flags; + nf_rec->tcp_flags = nf_flow->tcp_flags; nf_rec->ip_proto = expired->flow.nw_proto; - nf_rec->ip_tos = expired->ip_tos; + nf_rec->ip_tos = nf_flow->ip_tos; + + /* Update flow tracking data. */ + nf_flow->created = 0; + nf_flow->packet_count_off = expired->packet_count; + nf_flow->byte_count_off = expired->byte_count; + nf_flow->tcp_flags = 0; /* NetFlow messages are limited to 30 records. */ if (ntohs(nf_hdr->count) >= 30) { @@ -259,15 +276,21 @@ clear_collectors(struct netflow *nf) } int -netflow_set_collectors(struct netflow *nf, const struct svec *collectors_) +netflow_set_options(struct netflow *nf, + const struct netflow_options *nf_options) { struct svec collectors; int error = 0; size_t i; + long long int old_timeout; + + nf->engine_type = nf_options->engine_type; + nf->engine_id = nf_options->engine_id; + nf->add_id_to_iface = nf_options->add_id_to_iface; clear_collectors(nf); - svec_clone(&collectors, collectors_); + svec_clone(&collectors, &nf_options->collectors); svec_sort_unique(&collectors); nf->fds = xmalloc(sizeof *nf->fds * collectors.n); @@ -288,16 +311,19 @@ netflow_set_collectors(struct netflow *nf, const struct svec *collectors_) } svec_destroy(&collectors); - return error; -} -void -netflow_set_engine(struct netflow *nf, uint8_t engine_type, - uint8_t engine_id, bool add_id_to_iface) -{ - nf->engine_type = engine_type; - nf->engine_id = engine_id; - nf->add_id_to_iface = add_id_to_iface; + old_timeout = nf->active_timeout; + if (nf_options->active_timeout != -1) { + nf->active_timeout = nf_options->active_timeout; + } else { + nf->active_timeout = ACTIVE_TIMEOUT_DEFAULT; + } + nf->active_timeout *= 1000; + if (old_timeout != nf->active_timeout) { + nf->reconfig_time = time_msec(); + } + + return error; } struct netflow * @@ -324,3 +350,46 @@ netflow_destroy(struct netflow *nf) free(nf); } } + +void +netflow_flow_clear(struct netflow_flow *nf_flow) +{ + uint16_t output_iface = nf_flow->output_iface; + + memset(nf_flow, 0, sizeof *nf_flow); + nf_flow->output_iface = output_iface; +} + +void +netflow_flow_update_time(struct netflow *nf, struct netflow_flow *nf_flow, + long long int used) +{ + if (!nf_flow->created) { + nf_flow->created = used; + } + + if (!nf || !nf->active_timeout || !nf_flow->last_expired || + nf->reconfig_time > nf_flow->last_expired) { + /* Keep the time updated to prevent a flood of expiration in + * the future. */ + nf_flow->last_expired = time_msec(); + } +} + +void +netflow_flow_update_flags(struct netflow_flow *nf_flow, uint8_t ip_tos, + uint8_t tcp_flags) +{ + nf_flow->ip_tos = ip_tos; + nf_flow->tcp_flags |= tcp_flags; +} + +bool +netflow_active_timeout_expired(struct netflow *nf, struct netflow_flow *nf_flow) +{ + if (nf->active_timeout) { + return time_msec() > nf_flow->last_expired + nf->active_timeout; + } + + return false; +} diff --git a/secchan/netflow.h b/secchan/netflow.h index a5ad3360e..cc7b96057 100644 --- a/secchan/netflow.h +++ b/secchan/netflow.h @@ -18,9 +18,17 @@ #define NETFLOW_H 1 #include "flow.h" +#include "svec.h" struct ofexpired; -struct svec; + +struct netflow_options { + struct svec collectors; + uint8_t engine_type; + uint8_t engine_id; + int active_timeout; + bool add_id_to_iface; +}; enum netflow_output_ports { NF_OUT_FLOOD = UINT16_MAX, @@ -28,12 +36,30 @@ enum netflow_output_ports { NF_OUT_DROP = UINT16_MAX - 2 }; +struct netflow_flow { + long long int last_expired; /* Time this flow last timed out. */ + long long int created; /* Time flow was created since time out. */ + + uint64_t packet_count_off; /* Packet count at last time out. */ + uint64_t byte_count_off; /* Byte count at last time out. */ + + uint16_t output_iface; /* Output interface index. */ + uint8_t ip_tos; /* Last-seen IP type-of-service. */ + uint8_t tcp_flags; /* Bitwise-OR of all TCP flags seen. */ +}; + struct netflow *netflow_create(void); void netflow_destroy(struct netflow *); -int netflow_set_collectors(struct netflow *, const struct svec *collectors); -void netflow_set_engine(struct netflow *nf, uint8_t engine_type, - uint8_t engine_id, bool add_id_to_iface); -void netflow_expire(struct netflow *, const struct ofexpired *); +int netflow_set_options(struct netflow *, const struct netflow_options *); +void netflow_expire(struct netflow *, struct netflow_flow *, + struct ofexpired *); void netflow_run(struct netflow *); +void netflow_flow_clear(struct netflow_flow *); +void netflow_flow_update_time(struct netflow *, struct netflow_flow *, + long long int used); +void netflow_flow_update_flags(struct netflow_flow *, uint8_t ip_tos, + uint8_t tcp_flags); +bool netflow_active_timeout_expired(struct netflow *, struct netflow_flow *); + #endif /* netflow.h */ diff --git a/secchan/ofproto.c b/secchan/ofproto.c index 4266cbf71..babf01ed0 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -94,10 +94,8 @@ struct rule { uint64_t packet_count; /* Number of packets received. */ uint64_t byte_count; /* Number of bytes received. */ uint64_t accounted_bytes; /* Number of bytes passed to account_cb. */ - uint8_t tcp_flags; /* Bitwise-OR of all TCP flags seen. */ - uint8_t ip_tos; /* Last-seen IP type-of-service. */ tag_type tags; /* Tags (set only by hooks). */ - uint16_t nf_output_iface; /* Output interface index for NetFlow. */ + struct netflow_flow nf_flow; /* Per-flow NetFlow tracking data. */ /* If 'super' is non-NULL, this rule is a subrule, that is, it is an * exact-match rule (having cr.wc.wildcards of 0) generated from the @@ -146,9 +144,9 @@ rule_is_hidden(const struct rule *rule) return false; } -static struct rule *rule_create(struct rule *super, const union ofp_action *, - size_t n_actions, uint16_t idle_timeout, - uint16_t hard_timeout); +static struct rule *rule_create(struct ofproto *, struct rule *super, + const union ofp_action *, size_t n_actions, + uint16_t idle_timeout, uint16_t hard_timeout); static void rule_free(struct rule *); static void rule_destroy(struct ofproto *, struct rule *); static struct rule *rule_from_cls_rule(const struct cls_rule *); @@ -243,8 +241,10 @@ static uint64_t pick_fallback_dpid(void); static void send_packet_in_miss(struct ofpbuf *, void *ofproto); static void send_packet_in_action(struct ofpbuf *, void *ofproto); static void update_used(struct ofproto *); -static void update_stats(struct rule *, const struct odp_flow_stats *); +static void update_stats(struct ofproto *, struct rule *, + const struct odp_flow_stats *); static void expire_rule(struct cls_rule *, void *ofproto); +static void active_timeout(struct ofproto *ofproto, struct rule *rule); static bool revalidate_rule(struct ofproto *p, struct rule *rule); static void revalidate_cb(struct cls_rule *rule_, void *p_); @@ -543,16 +543,14 @@ ofproto_set_snoops(struct ofproto *ofproto, const struct svec *snoops) } int -ofproto_set_netflow(struct ofproto *ofproto, const struct svec *collectors, - uint8_t engine_type, uint8_t engine_id, bool add_id_to_iface) +ofproto_set_netflow(struct ofproto *ofproto, + const struct netflow_options *nf_options) { - if (collectors && collectors->n) { + if (nf_options->collectors.n) { if (!ofproto->netflow) { ofproto->netflow = netflow_create(); } - netflow_set_engine(ofproto->netflow, engine_type, engine_id, - add_id_to_iface); - return netflow_set_collectors(ofproto->netflow, collectors); + return netflow_set_options(ofproto->netflow, nf_options); } else { netflow_destroy(ofproto->netflow); ofproto->netflow = NULL; @@ -991,7 +989,7 @@ ofproto_add_flow(struct ofproto *p, int idle_timeout) { struct rule *rule; - rule = rule_create(NULL, actions, n_actions, + rule = rule_create(p, NULL, actions, n_actions, idle_timeout >= 0 ? idle_timeout : 5 /* XXX */, 0); cls_rule_from_flow(&rule->cr, flow, wildcards, priority); rule_insert(p, rule, NULL, 0); @@ -1383,7 +1381,7 @@ ofconn_wait(struct ofconn *ofconn) /* Caller is responsible for initializing the 'cr' member of the returned * rule. */ static struct rule * -rule_create(struct rule *super, +rule_create(struct ofproto *ofproto, struct rule *super, const union ofp_action *actions, size_t n_actions, uint16_t idle_timeout, uint16_t hard_timeout) { @@ -1399,6 +1397,9 @@ rule_create(struct rule *super, } rule->n_actions = n_actions; rule->actions = xmemdup(actions, n_actions * sizeof *actions); + netflow_flow_clear(&rule->nf_flow); + netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->created); + return rule; } @@ -1502,8 +1503,9 @@ rule_execute(struct ofproto *ofproto, struct rule *rule, actions, n_actions, packet)) { struct odp_flow_stats stats; flow_extract_stats(flow, packet, &stats); - update_stats(rule, &stats); + update_stats(ofproto, rule, &stats); rule->used = time_msec(); + netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, rule->used); } } @@ -1545,7 +1547,7 @@ static struct rule * rule_create_subrule(struct ofproto *ofproto, struct rule *rule, const flow_t *flow) { - struct rule *subrule = rule_create(rule, NULL, 0, + struct rule *subrule = rule_create(ofproto, rule, NULL, 0, rule->idle_timeout, rule->hard_timeout); COVERAGE_INC(ofproto_subrule_create); cls_rule_from_flow(&subrule->cr, flow, 0, @@ -1584,7 +1586,7 @@ rule_make_actions(struct ofproto *p, struct rule *rule, rule->tags = 0; xlate_actions(super->actions, super->n_actions, &rule->cr.flow, p, packet, &a, &rule->tags, &rule->may_install, - &rule->nf_output_iface); + &rule->nf_flow.output_iface); actions_len = a.n_actions * sizeof *a.actions; if (rule->n_odp_actions != a.n_actions @@ -1623,7 +1625,7 @@ rule_install(struct ofproto *p, struct rule *rule, struct rule *displaced_rule) &put)) { rule->installed = true; if (displaced_rule) { - update_stats(rule, &put.flow.stats); + update_stats(p, rule, &put.flow.stats); rule_post_uninstall(p, displaced_rule); } } @@ -1690,7 +1692,7 @@ rule_uninstall(struct ofproto *p, struct rule *rule) odp_flow.actions = NULL; odp_flow.n_actions = 0; if (!dpif_flow_del(&p->dpif, &odp_flow)) { - update_stats(rule, &odp_flow.stats); + update_stats(p, rule, &odp_flow.stats); } rule->installed = false; @@ -1698,47 +1700,50 @@ rule_uninstall(struct ofproto *p, struct rule *rule) } } +static bool +is_controller_rule(struct rule *rule) +{ + /* If the only action is send to the controller then don't report + * NetFlow expiration messages since it is just part of the control + * logic for the network and not real traffic. */ + + if (rule && rule->super) { + struct rule *super = rule->super; + + return super->n_actions == 1 && + super->actions[0].type == htons(OFPAT_OUTPUT) && + super->actions[0].output.port == htons(OFPP_CONTROLLER); + } + + return false; +} + static void rule_post_uninstall(struct ofproto *ofproto, struct rule *rule) { struct rule *super = rule->super; - bool controller_action; rule_account(ofproto, rule, 0); - /* If the only action is send to the controller then don't report - * NetFlow expiration messages since it is just part of the control - * logic for the network and not real traffic. */ - controller_action = rule->n_odp_actions == 1 && - rule->odp_actions[0].type == ODPAT_CONTROLLER; - - if (ofproto->netflow && rule->byte_count && !controller_action) { + if (ofproto->netflow && !is_controller_rule(rule)) { struct ofexpired expired; expired.flow = rule->cr.flow; expired.packet_count = rule->packet_count; expired.byte_count = rule->byte_count; expired.used = rule->used; - expired.created = rule->created; - expired.tcp_flags = rule->tcp_flags; - expired.ip_tos = rule->ip_tos; - expired.output_iface = rule->nf_output_iface; - netflow_expire(ofproto->netflow, &expired); + netflow_expire(ofproto->netflow, &rule->nf_flow, &expired); } if (super) { super->packet_count += rule->packet_count; super->byte_count += rule->byte_count; - super->tcp_flags |= rule->tcp_flags; - if (rule->packet_count) { - super->ip_tos = rule->ip_tos; - } /* Reset counters to prevent double counting if the rule ever gets * reinstalled. */ rule->packet_count = 0; rule->byte_count = 0; rule->accounted_bytes = 0; - rule->tcp_flags = 0; - rule->ip_tos = 0; + + netflow_flow_clear(&rule->nf_flow); } } @@ -2717,23 +2722,26 @@ msec_from_nsec(uint64_t sec, uint32_t nsec) } static void -update_time(struct rule *rule, const struct odp_flow_stats *stats) +update_time(struct ofproto *ofproto, struct rule *rule, + const struct odp_flow_stats *stats) { long long int used = msec_from_nsec(stats->used_sec, stats->used_nsec); if (used > rule->used) { rule->used = used; + netflow_flow_update_time(ofproto->netflow, &rule->nf_flow, used); } } static void -update_stats(struct rule *rule, const struct odp_flow_stats *stats) +update_stats(struct ofproto *ofproto, struct rule *rule, + const struct odp_flow_stats *stats) { - update_time(rule, stats); - rule->packet_count += stats->n_packets; - rule->byte_count += stats->n_bytes; - rule->tcp_flags |= stats->tcp_flags; if (stats->n_packets) { - rule->ip_tos = stats->ip_tos; + update_time(ofproto, rule, stats); + rule->packet_count += stats->n_packets; + rule->byte_count += stats->n_bytes; + netflow_flow_update_flags(&rule->nf_flow, stats->ip_tos, + stats->tcp_flags); } } @@ -2746,7 +2754,7 @@ add_flow(struct ofproto *p, struct ofconn *ofconn, uint16_t in_port; int error; - rule = rule_create(NULL, (const union ofp_action *) ofm->actions, + rule = rule_create(p, NULL, (const union ofp_action *) ofm->actions, n_actions, ntohs(ofm->idle_timeout), ntohs(ofm->hard_timeout)); cls_rule_from_match(&rule->cr, &ofm->match, ntohs(ofm->priority)); @@ -3271,18 +3279,15 @@ expire_rule(struct cls_rule *cls_rule, void *p_) ? rule->used + rule->idle_timeout * 1000 : LLONG_MAX); expire = MIN(hard_expire, idle_expire); - if (expire == LLONG_MAX) { - if (rule->installed && time_msec() >= rule->used + 5000) { - uninstall_idle_flow(p, rule); - } - return; - } now = time_msec(); if (now < expire) { if (rule->installed && now >= rule->used + 5000) { uninstall_idle_flow(p, rule); + } else if (!rule->cr.wc.wildcards) { + active_timeout(p, rule); } + return; } @@ -3303,6 +3308,40 @@ expire_rule(struct cls_rule *cls_rule, void *p_) rule_remove(p, rule); } +static void +active_timeout(struct ofproto *ofproto, struct rule *rule) +{ + if (ofproto->netflow && !is_controller_rule(rule) && + netflow_active_timeout_expired(ofproto->netflow, &rule->nf_flow)) { + struct ofexpired expired; + struct odp_flow odp_flow; + + /* Get updated flow stats. */ + memset(&odp_flow, 0, sizeof odp_flow); + odp_flow.key = rule->cr.flow; + odp_flow.flags = ODPFF_ZERO_TCP_FLAGS; + dpif_flow_get(&ofproto->dpif, &odp_flow); + + if (odp_flow.stats.n_packets) { + update_time(ofproto, rule, &odp_flow.stats); + netflow_flow_update_flags(&rule->nf_flow, odp_flow.stats.ip_tos, + odp_flow.stats.tcp_flags); + } + + expired.flow = rule->cr.flow; + expired.packet_count = rule->packet_count + + odp_flow.stats.n_packets; + expired.byte_count = rule->byte_count + odp_flow.stats.n_bytes; + expired.used = rule->used; + + netflow_expire(ofproto->netflow, &rule->nf_flow, &expired); + + /* Schedule us to send the accumulated records once we have + * collected all of them. */ + poll_immediate_wake(); + } +} + static void update_used(struct ofproto *p) { @@ -3328,7 +3367,7 @@ update_used(struct ofproto *p) continue; } - update_time(rule, &f->stats); + update_time(p, rule, &f->stats); rule_account(p, rule, f->stats.n_bytes); } free(flows); diff --git a/secchan/ofproto.h b/secchan/ofproto.h index 8847deed0..50dd5d5b4 100644 --- a/secchan/ofproto.h +++ b/secchan/ofproto.h @@ -21,6 +21,7 @@ #include #include #include "flow.h" +#include "netflow.h" #include "tag.h" struct odp_actions; @@ -30,13 +31,9 @@ struct svec; struct ofexpired { flow_t flow; - uint64_t packet_count; /* Packets from *expired* subrules. */ - uint64_t byte_count; /* Bytes from *expired* subrules. */ + uint64_t packet_count; /* Packets from subrules. */ + uint64_t byte_count; /* Bytes from subrules. */ long long int used; /* Last-used time (0 if never used). */ - long long int created; /* Creation time. */ - uint8_t tcp_flags; /* Bitwise-OR of all TCP flags seen. */ - uint8_t ip_tos; /* Last-seen IP type-of-service. */ - uint16_t output_iface; /* Output interface index. */ }; int ofproto_create(const char *datapath, const struct ofhooks *, void *aux, @@ -63,8 +60,8 @@ int ofproto_set_discovery(struct ofproto *, bool discovery, int ofproto_set_controller(struct ofproto *, const char *controller); int ofproto_set_listeners(struct ofproto *, const struct svec *listeners); int ofproto_set_snoops(struct ofproto *, const struct svec *snoops); -int ofproto_set_netflow(struct ofproto *, const struct svec *collectors, - uint8_t engine_type, uint8_t engine_id, bool add_id_to_iface); +int ofproto_set_netflow(struct ofproto *, + const struct netflow_options *nf_options); void ofproto_set_failure(struct ofproto *, bool fail_open); void ofproto_set_rate_limit(struct ofproto *, int rate_limit, int burst_limit); int ofproto_set_stp(struct ofproto *, bool enable_stp); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index fcdd86603..ad9b46ef3 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -495,10 +495,7 @@ bridge_reconfigure(void) uint64_t dpid; struct iface *local_iface = NULL; const char *devname; - uint8_t engine_type = br->dpif.minor; - uint8_t engine_id = br->dpif.minor; - bool add_id_to_iface = false; - struct svec nf_hosts; + struct netflow_options nf_options; bridge_fetch_dp_ifaces(br); for (i = 0; i < br->n_ports; ) { @@ -543,35 +540,46 @@ bridge_reconfigure(void) ofproto_set_datapath_id(br->ofproto, dpid); /* Set NetFlow configuration on this bridge. */ + memset(&nf_options, 0, sizeof nf_options); + nf_options.engine_type = br->dpif.minor; + nf_options.engine_id = br->dpif.minor; + nf_options.active_timeout = -1; + if (cfg_has("netflow.%s.engine-type", br->name)) { - engine_type = cfg_get_int(0, "netflow.%s.engine-type", + nf_options.engine_type = cfg_get_int(0, "netflow.%s.engine-type", br->name); } if (cfg_has("netflow.%s.engine-id", br->name)) { - engine_id = cfg_get_int(0, "netflow.%s.engine-id", br->name); + nf_options.engine_id = cfg_get_int(0, "netflow.%s.engine-id", + br->name); + } + if (cfg_has("netflow.%s.active-timeout", br->name)) { + nf_options.active_timeout = cfg_get_int(0, + "netflow.%s.active-timeout", + br->name); } if (cfg_has("netflow.%s.add-id-to-iface", br->name)) { - add_id_to_iface = cfg_get_bool(0, "netflow.%s.add-id-to-iface", - br->name); + nf_options.add_id_to_iface = cfg_get_bool(0, + "netflow.%s.add-id-to-iface", + br->name); } - if (add_id_to_iface && engine_id > 0x7f) { + if (nf_options.add_id_to_iface && nf_options.engine_id > 0x7f) { VLOG_WARN("bridge %s: netflow port mangling may conflict with " "another vswitch, choose an engine id less than 128", br->name); } - if (add_id_to_iface && br->n_ports > 0x1ff) { + if (nf_options.add_id_to_iface && br->n_ports > 508) { VLOG_WARN("bridge %s: netflow port mangling will conflict with " - "another port when 512 or more ports are used", + "another port when more than 508 ports are used", br->name); } - svec_init(&nf_hosts); - cfg_get_all_keys(&nf_hosts, "netflow.%s.host", br->name); - if (ofproto_set_netflow(br->ofproto, &nf_hosts, engine_type, - engine_id, add_id_to_iface)) { + svec_init(&nf_options.collectors); + cfg_get_all_keys(&nf_options.collectors, "netflow.%s.host", br->name); + if (ofproto_set_netflow(br->ofproto, &nf_options)) { VLOG_ERR("bridge %s: problem setting netflow collectors", br->name); } - svec_destroy(&nf_hosts); + svec_destroy(&nf_options.collectors); /* Update the controller and related settings. It would be more * straightforward to call this from bridge_reconfigure_one(), but we diff --git a/vswitchd/ovs-vswitchd.conf.5.in b/vswitchd/ovs-vswitchd.conf.5.in index f998e4900..7c3c87f3e 100644 --- a/vswitchd/ovs-vswitchd.conf.5.in +++ b/vswitchd/ovs-vswitchd.conf.5.in @@ -412,18 +412,24 @@ port.eth1.ingress.policing-burst=20 .fi .SS "NetFlow v5 Flow Logging" -NetFlow is a protocol that exports a number of details about terminating -IP flows, such as the principals involved and duration. A bridge may be -configured to send NetFlow v5 records to NetFlow collectors when flows -end. To enable, define the key \fBnetflow.\fIbridge\fB.host\fR for each -collector in the form \fIip\fB:\fIport\fR. Records from \fIbridge\fR +NetFlow is a protocol that exports a number of details about terminating +IP flows, such as the principals involved and duration. A bridge may be +configured to send NetFlow v5 records to NetFlow collectors when flows +end. To enable, define the key \fBnetflow.\fIbridge\fB.host\fR for each +collector in the form \fIip\fB:\fIport\fR. Records from \fIbridge\fR will be sent to each \fIip\fR on UDP \fIport\fR. The \fIip\fR must be specified numerically, not as a DNS name. -The NetFlow messages will use the datapath index for the engine type and id. -This can be overridden with the \fBnetflow.\fIbridge\fB.engine-type\fR and +In addition to terminating flows, NetFlow can also send records at a set +interval for flows that are still active. This interval can be configured +by defining the key \fBnetflow.\fIbridge\fB\.active-timeout\fR. The value +is in seconds. An active timeout of 0 will disable this functionality. By +default there is timeout value of 600 seconds. + +The NetFlow messages will use the datapath index for the engine type and id. +This can be overridden with the \fBnetflow.\fIbridge\fB.engine-type\fR and \fBnetflow.\fIbridge\fB.engine-id\fR, respectively. Each takes a value -between 0 and 255, inclusive. +between 0 and 255, inclusive. Many NetFlow collectors do not expect multiple virtual switches to be sending messages from the same host, and they do not store the engine @@ -431,7 +437,7 @@ information which could be used to disambiguate the traffic. To prevent flows from multiple switches appearing as if they came on the interface, add \fBnetflow.\fIbridge\fB.add-id-to-iface=true\fR to the configuration file. This will place the least significant 7 bits of the engine id -into the most significant bits of the ingress and egress interface fields +into the most significant bits of the ingress and egress interface fields of flow records. When this option is enabled, a maximum of 508 ports are supported. By default, this behavior is disabled. From 30746a1b6a9758a58d6f9a7a85830512aa0e114f Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 3 Nov 2009 13:56:46 -0800 Subject: [PATCH 12/23] Mention running boot.sh when pulling sources from Git When the sources are pulled directly from Git, it is necessary to run "./boot.sh" before "./configure" can be run. This commit documents that useful bit of information. --- INSTALL.Linux | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/INSTALL.Linux b/INSTALL.Linux index 268394098..aadac73f4 100644 --- a/INSTALL.Linux +++ b/INSTALL.Linux @@ -92,7 +92,12 @@ Building and Installing Open vSwitch for Linux Once you have installed all the prerequisites listed above in the Base Prerequisites section, follow the procedure below to build. -1. In the top source directory, configure the package by running the +1. If you pulled the sources directly from an Open vSwitch Git tree, + run boot.sh in the top source directory: + + % ./boot.sh + +2. In the top source directory, configure the package by running the configure script. You can usually invoke configure without any arguments: @@ -128,16 +133,16 @@ Prerequisites section, follow the procedure below to build. additional environment variables. For a full list, invoke configure with the --help option. -2. Run make in the top source directory: +3. Run make in the top source directory: % make -3. Become root by running "su" or another program. +4. Become root by running "su" or another program. -4. Run "make install" to install the executables and manpages into the +5. Run "make install" to install the executables and manpages into the running system, by default under /usr/local. -5. If you built kernel modules, you may load them with "insmod", e.g.: +6. If you built kernel modules, you may load them with "insmod", e.g.: % insmod datapath/linux-2.6/openvswitch_mod.ko From 2a577bd807023009afab02d975787d67389c4a27 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Tue, 3 Nov 2009 21:19:47 -0800 Subject: [PATCH 13/23] xenserver: Fix issue with deleting network UUID on VLAN destruction In XenServer, a VLAN is considered an additional network with its own UUID. The interface-reconfigure script properly adds this network UUID to the configuration script, but commit 774428 removed the code that would remove this information on VLAN destruction. Ian Campbell was the author of that commit and felt that reverting this part was safe. Bug #1973 --- xenserver/opt_xensource_libexec_interface-reconfigure | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xenserver/opt_xensource_libexec_interface-reconfigure b/xenserver/opt_xensource_libexec_interface-reconfigure index 82c386368..59036a561 100755 --- a/xenserver/opt_xensource_libexec_interface-reconfigure +++ b/xenserver/opt_xensource_libexec_interface-reconfigure @@ -1518,10 +1518,10 @@ def action_down(pif): ifdown(ipdev) if dp: - #nw = db.get_pif_record(pif)['network'] - #nwrec = db.get_network_record(nw) - #cfgmod_argv += ['# deconfigure xs-network-uuids'] - #cfgmod_argv += ['--del-entry=bridge.%s.xs-network-uuids=%s' % (bridge,nwrec['uuid'])] + nw = db.get_pif_record(pif)['network'] + nwrec = db.get_network_record(nw) + cfgmod_argv += ['# deconfigure xs-network-uuids'] + cfgmod_argv += ['--del-entry=bridge.%s.xs-network-uuids=%s' % (bridge,nwrec['uuid'])] log("deconfigure ipdev %s on %s" % (ipdev,bridge)) cfgmod_argv += ["# deconfigure ipdev %s" % ipdev] From cae7a4b90a55cbfd4cfd23c06f9f09cd429ab4c0 Mon Sep 17 00:00:00 2001 From: Jean Tourrilhes Date: Wed, 4 Nov 2009 13:21:07 -0800 Subject: [PATCH 14/23] ovs-ofctl: Fix use-after-free error in del-flows command. --- utilities/ovs-ofctl.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 761141597..9efd484ff 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -948,11 +948,15 @@ static void do_del_flows(const struct settings *s, int argc, char *argv[]) uint16_t out_port; struct ofpbuf *buffer; struct ofp_flow_mod *ofm; + struct ofp_match match; - /* Parse and send. */ - ofm = make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &buffer); - str_to_flow(argc > 2 ? argv[2] : "", &ofm->match, NULL, NULL, - &out_port, &priority, NULL, NULL); + /* Parse and send. str_to_flow() will expand and reallocate the data in + * 'buffer', so we can't keep pointers to across the str_to_flow() call. */ + make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &buffer); + str_to_flow(argc > 2 ? argv[2] : "", &match, buffer, + NULL, &out_port, &priority, NULL, NULL); + ofm = buffer->data; + ofm->match = match; if (s->strict) { ofm->command = htons(OFPFC_DELETE_STRICT); } else { From ae602adc43835e56ce388d11c55aa2599a6fa8ad Mon Sep 17 00:00:00 2001 From: Jean Tourrilhes Date: Wed, 4 Nov 2009 23:40:18 -0800 Subject: [PATCH 15/23] Revert "ovs-ofctl: Fix use-after-free error in del-flows command." This reverts commit cae7a4b90a55cbfd4cfd23c06f9f09cd429ab4c0. This commit forced the user to specify an action when deleting a flow, which is not desirable. The change was not actually needed, as the buffer is never passed to str_to_flow() in the original code. --- utilities/ovs-ofctl.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 9efd484ff..761141597 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -948,15 +948,11 @@ static void do_del_flows(const struct settings *s, int argc, char *argv[]) uint16_t out_port; struct ofpbuf *buffer; struct ofp_flow_mod *ofm; - struct ofp_match match; - /* Parse and send. str_to_flow() will expand and reallocate the data in - * 'buffer', so we can't keep pointers to across the str_to_flow() call. */ - make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &buffer); - str_to_flow(argc > 2 ? argv[2] : "", &match, buffer, - NULL, &out_port, &priority, NULL, NULL); - ofm = buffer->data; - ofm->match = match; + /* Parse and send. */ + ofm = make_openflow(sizeof *ofm, OFPT_FLOW_MOD, &buffer); + str_to_flow(argc > 2 ? argv[2] : "", &ofm->match, NULL, NULL, + &out_port, &priority, NULL, NULL); if (s->strict) { ofm->command = htons(OFPFC_DELETE_STRICT); } else { From 094e151456b597c278f3b416523cf36b5e8e7f99 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 6 Nov 2009 13:26:42 -0800 Subject: [PATCH 16/23] netflow: Only query stats of installed flows. NetFlow active timeouts was querying the stats of all exact match flows that had reached a certain age including those that could not be installed. This was not harmful but it was wasteful and produced log spew. This changes it to only query the flows that are actually installed. Bug #2252 --- secchan/ofproto.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/secchan/ofproto.c b/secchan/ofproto.c index babf01ed0..cdb94b504 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -3318,14 +3318,16 @@ active_timeout(struct ofproto *ofproto, struct rule *rule) /* Get updated flow stats. */ memset(&odp_flow, 0, sizeof odp_flow); - odp_flow.key = rule->cr.flow; - odp_flow.flags = ODPFF_ZERO_TCP_FLAGS; - dpif_flow_get(&ofproto->dpif, &odp_flow); + if (rule->installed) { + odp_flow.key = rule->cr.flow; + odp_flow.flags = ODPFF_ZERO_TCP_FLAGS; + dpif_flow_get(&ofproto->dpif, &odp_flow); - if (odp_flow.stats.n_packets) { - update_time(ofproto, rule, &odp_flow.stats); - netflow_flow_update_flags(&rule->nf_flow, odp_flow.stats.ip_tos, - odp_flow.stats.tcp_flags); + if (odp_flow.stats.n_packets) { + update_time(ofproto, rule, &odp_flow.stats); + netflow_flow_update_flags(&rule->nf_flow, odp_flow.stats.ip_tos, + odp_flow.stats.tcp_flags); + } } expired.flow = rule->cr.flow; From 7559c396688cbbc6fab80897bbb4b993646bcf54 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 6 Nov 2009 10:22:55 -0800 Subject: [PATCH 17/23] backtrace: Suppress dumb GCC warning on x86-64. Without this change GCC warns "use of assignment suppression and length modifier together in scanf format", which doesn't actually point out any real problem (and why would it? Google turns up nothing interesting). --- lib/backtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/backtrace.c b/lib/backtrace.c index 0999a081e..42ab8c48b 100644 --- a/lib/backtrace.c +++ b/lib/backtrace.c @@ -42,7 +42,7 @@ get_max_stack(void) for (line_number = 1; fgets(line, sizeof line, f); line_number++) { if (strstr(line, "[stack]")) { uintptr_t end; - if (sscanf(line, "%*"SCNxPTR"-%"SCNxPTR, &end) != 1) { + if (sscanf(line, "%*x-%"SCNxPTR, &end) != 1) { VLOG_WARN("%s:%d: parse error", file_name, line_number); continue; } From cc56746aed6d53046b17b01756362e566734241c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 6 Nov 2009 10:25:50 -0800 Subject: [PATCH 18/23] backtrace: Avoid GCC warning on x86-64. The portable implementation of stack_low(), which before this commit is used on x86-64, provokes a warning from GCC that cannot be disabled. We already have an i386-specific implementation that does not warn; this commit adds a corresponding implementation for x86-64 to avoid the warning there too. --- lib/backtrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/backtrace.c b/lib/backtrace.c index 42ab8c48b..2f4780932 100644 --- a/lib/backtrace.c +++ b/lib/backtrace.c @@ -73,6 +73,10 @@ stack_low(void) uintptr_t low; asm("movl %%esp,%0" : "=g" (low)); return low; +#elif __x86_64__ + uintptr_t low; + asm("movq %%rsp,%0" : "=g" (low)); + return low; #else /* This causes a warning in GCC that cannot be disabled, so use it only on * non-x86. */ From 2886875a385df582354ac916431b278f2fe698e0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 6 Nov 2009 10:43:50 -0800 Subject: [PATCH 19/23] Fix incorrect printf format specifiers. GCC reported these during a 64-bit build. --- lib/cfg.c | 2 +- lib/dpif.c | 2 +- lib/fault.c | 2 +- lib/leak-checker.c | 2 +- lib/odp-util.c | 5 +++-- lib/pcap.c | 2 +- lib/poll-loop.c | 3 ++- lib/vconn.c | 12 ++++++------ secchan/ofproto.c | 4 ++-- utilities/ovs-dpctl.c | 8 +++++--- utilities/ovs-ofctl.c | 2 +- vswitchd/bridge.c | 2 +- vswitchd/mgmt.c | 2 +- 13 files changed, 26 insertions(+), 22 deletions(-) diff --git a/lib/cfg.c b/lib/cfg.c index 225827ef5..d0170b60e 100644 --- a/lib/cfg.c +++ b/lib/cfg.c @@ -136,7 +136,7 @@ cfg_set_file(const char *file_name) slash = strrchr(file_name, '/'); if (slash) { lock_name = xasprintf("%.*s/.%s.~lock~", - slash - file_name, file_name, slash + 1); + (int) (slash - file_name), file_name, slash + 1); } else { lock_name = xasprintf(".%s.~lock~", file_name); } diff --git a/lib/dpif.c b/lib/dpif.c index 4475913ce..78e8ec3cf 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -706,7 +706,7 @@ dpif_recv(struct dpif *dpif, struct ofpbuf **bufp) return 0; } else { VLOG_WARN_RL(&error_rl, "dp%u: discarding message truncated " - "from %zu bytes to %d", + "from %"PRIu32" bytes to %d", dpif->minor, msg->length, retval); error = ERANGE; } diff --git a/lib/fault.c b/lib/fault.c index 3882eff7c..14e229ed6 100644 --- a/lib/fault.c +++ b/lib/fault.c @@ -54,7 +54,7 @@ log_backtrace(void) if (!dladdr(frame[1], &addrinfo) || !addrinfo.dli_sname) { fprintf(stderr, " 0x%08"PRIxPTR"\n", (uintptr_t) frame[1]); } else { - fprintf(stderr, " 0x%08"PRIxPTR" (%s+0x%x)\n", + fprintf(stderr, " 0x%08"PRIxPTR" (%s+0x%tx)\n", (uintptr_t) frame[1], addrinfo.dli_sname, (char *) frame[1] - (char *) addrinfo.dli_saddr); } diff --git a/lib/leak-checker.c b/lib/leak-checker.c index c2c4348f5..8d256bc8c 100644 --- a/lib/leak-checker.c +++ b/lib/leak-checker.c @@ -141,7 +141,7 @@ log_callers(const char *format, ...) putc(':', file); backtrace_capture(&backtrace); for (i = 0; i < backtrace.n_frames; i++) { - fprintf(file, " 0x%x", backtrace.frames[i]); + fprintf(file, " 0x%"PRIxPTR, backtrace.frames[i]); } putc('\n', file); } diff --git a/lib/odp-util.c b/lib/odp-util.c index 21bc9a516..a80016209 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -111,8 +111,9 @@ format_odp_actions(struct ds *ds, const union odp_action *actions, void format_odp_flow_stats(struct ds *ds, const struct odp_flow_stats *s) { - ds_put_format(ds, "packets:%"PRIu64", bytes:%"PRIu64", used:", - s->n_packets, s->n_bytes); + ds_put_format(ds, "packets:%llu, bytes:%llu, used:", + (unsigned long long int) s->n_packets, + (unsigned long long int) s->n_bytes); if (s->used_sec) { long long int used = s->used_sec * 1000 + s->used_nsec / 1000000; ds_put_format(ds, "%.3fs", (time_msec() - used) / 1000.0); diff --git a/lib/pcap.c b/lib/pcap.c index 4330c575d..967bb5c34 100644 --- a/lib/pcap.c +++ b/lib/pcap.c @@ -128,7 +128,7 @@ pcap_read(FILE *file, struct ofpbuf **bufp) ((len & 0x0000ff00) << 8) | ((len & 0x000000ff) << 24)); if (swapped_len > 0xffff) { - VLOG_WARN("bad packet length %"PRIu32" or %"PRIu32" " + VLOG_WARN("bad packet length %zu or %"PRIu32" " "reading pcap file", len, swapped_len); return EPROTO; diff --git a/lib/poll-loop.c b/lib/poll-loop.c index aff2c335c..945b5c441 100644 --- a/lib/poll-loop.c +++ b/lib/poll-loop.c @@ -18,6 +18,7 @@ #include "poll-loop.h" #include #include +#include #include #include #include @@ -122,7 +123,7 @@ log_wakeup(const struct backtrace *backtrace, const char *format, ...) ds_put_char(&ds, ':'); for (i = 0; i < backtrace->n_frames; i++) { - ds_put_format(&ds, " 0x%x", backtrace->frames[i]); + ds_put_format(&ds, " 0x%"PRIxPTR, backtrace->frames[i]); } } VLOG_DBG("%s", ds_cstr(&ds)); diff --git a/lib/vconn.c b/lib/vconn.c index d1b8353b2..288a09581 100644 --- a/lib/vconn.c +++ b/lib/vconn.c @@ -1059,7 +1059,7 @@ check_ofp_message(const struct ofp_header *msg, uint8_t type, size_t size) if (got_size != size) { char *type_name = ofp_message_type_to_string(type); VLOG_WARN_RL(&bad_ofmsg_rl, - "received %s message of length %"PRIu16" (expected %zu)", + "received %s message of length %zu (expected %zu)", type_name, got_size, size); free(type_name); return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); @@ -1094,7 +1094,7 @@ check_ofp_message_array(const struct ofp_header *msg, uint8_t type, got_size = ntohs(msg->length); if (got_size < min_size) { char *type_name = ofp_message_type_to_string(type); - VLOG_WARN_RL(&bad_ofmsg_rl, "received %s message of length %"PRIu16" " + VLOG_WARN_RL(&bad_ofmsg_rl, "received %s message of length %zu " "(expected at least %zu)", type_name, got_size, min_size); free(type_name); @@ -1103,7 +1103,7 @@ check_ofp_message_array(const struct ofp_header *msg, uint8_t type, if ((got_size - min_size) % array_elt_size) { char *type_name = ofp_message_type_to_string(type); VLOG_WARN_RL(&bad_ofmsg_rl, - "received %s message of bad length %"PRIu16": the " + "received %s message of bad length %zu: the " "excess over %zu (%zu) is not evenly divisible by %zu " "(remainder is %zu)", type_name, got_size, min_size, got_size - min_size, @@ -1136,13 +1136,13 @@ check_ofp_packet_out(const struct ofp_header *oh, struct ofpbuf *data, actions_len = ntohs(opo->actions_len); if (actions_len > extra) { - VLOG_WARN_RL(&bad_ofmsg_rl, "packet-out claims %zu bytes of actions " + VLOG_WARN_RL(&bad_ofmsg_rl, "packet-out claims %u bytes of actions " "but message has room for only %zu bytes", actions_len, extra); return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); } if (actions_len % sizeof(union ofp_action)) { - VLOG_WARN_RL(&bad_ofmsg_rl, "packet-out claims %zu bytes of actions, " + VLOG_WARN_RL(&bad_ofmsg_rl, "packet-out claims %u bytes of actions, " "which is not a multiple of %zu", actions_len, sizeof(union ofp_action)); return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); @@ -1329,7 +1329,7 @@ validate_actions(const union ofp_action *actions, size_t n_actions, if (n_slots > slots_left) { VLOG_DBG_RL(&bad_ofmsg_rl, - "action requires %u slots but only %td remain", + "action requires %u slots but only %u remain", n_slots, slots_left); return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_LEN); } diff --git a/secchan/ofproto.c b/secchan/ofproto.c index cdb94b504..6319b0320 100644 --- a/secchan/ofproto.c +++ b/secchan/ofproto.c @@ -2945,7 +2945,7 @@ handle_ofmp(struct ofproto *p, struct ofconn *ofconn, { size_t msg_len = ntohs(ofmph->header.header.length); if (msg_len < sizeof(*ofmph)) { - VLOG_WARN_RL(&rl, "dropping short managment message: %d\n", msg_len); + VLOG_WARN_RL(&rl, "dropping short managment message: %zu\n", msg_len); return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); } @@ -2953,7 +2953,7 @@ handle_ofmp(struct ofproto *p, struct ofconn *ofconn, struct ofmp_capability_request *ofmpcr; if (msg_len < sizeof(struct ofmp_capability_request)) { - VLOG_WARN_RL(&rl, "dropping short capability request: %d\n", + VLOG_WARN_RL(&rl, "dropping short capability request: %zu\n", msg_len); return ofp_mkerr(OFPET_BAD_REQUEST, OFPBRC_BAD_LENGTH); } diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c index 886cdb3ac..0dd93351d 100644 --- a/utilities/ovs-dpctl.c +++ b/utilities/ovs-dpctl.c @@ -408,9 +408,11 @@ show_dpif(struct dpif *dpif) printf("\tports: cur:%"PRIu32", max:%"PRIu32"\n", stats.n_ports, stats.max_ports); printf("\tgroups: max:%"PRIu16"\n", stats.max_groups); - printf("\tlookups: frags:%"PRIu64", hit:%"PRIu64", missed:%"PRIu64", " - "lost:%"PRIu64"\n", - stats.n_frags, stats.n_hit, stats.n_missed, stats.n_lost); + printf("\tlookups: frags:%llu, hit:%llu, missed:%llu, lost:%llu\n", + (unsigned long long int) stats.n_frags, + (unsigned long long int) stats.n_hit, + (unsigned long long int) stats.n_missed, + (unsigned long long int) stats.n_lost); printf("\tqueues: max-miss:%"PRIu16", max-action:%"PRIu16"\n", stats.max_miss_queue, stats.max_action_queue); } diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 761141597..7c20d7ad3 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -1135,7 +1135,7 @@ do_ping(const struct settings *s UNUSED, int argc, char *argv[]) printf("Reply:\n"); ofp_print(stdout, reply, reply->size, 2); } - printf("%d bytes from %s: xid=%08"PRIx32" time=%.1f ms\n", + printf("%zu bytes from %s: xid=%08"PRIx32" time=%.1f ms\n", reply->size - sizeof *rpy_hdr, argv[1], rpy_hdr->xid, (1000*(double)(end.tv_sec - start.tv_sec)) + (.001*(end.tv_usec - start.tv_usec))); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index ad9b46ef3..f4b2fb2c3 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -2550,7 +2550,7 @@ bond_unixctl_show(struct unixctl_conn *conn, const char *args) continue; } - ds_put_format(&ds, "\thash %d: %lld kB load\n", + ds_put_format(&ds, "\thash %d: %"PRIu64" kB load\n", hash, be->tx_bytes / 1024); /* MACs. */ diff --git a/vswitchd/mgmt.c b/vswitchd/mgmt.c index d15b4ba46..8da640fed 100644 --- a/vswitchd/mgmt.c +++ b/vswitchd/mgmt.c @@ -700,7 +700,7 @@ recv_ofmp_extended_data(uint32_t xid, const struct ofmp_header *ofmph, * OpenFlow message. */ new_oh = ofpbuf_at(&ext_data_buffer, 0, 65536); if (!new_oh) { - VLOG_WARN_RL(&rl, "received short embedded message: %d\n", + VLOG_WARN_RL(&rl, "received short embedded message: %zu\n", ext_data_buffer.size); return -EINVAL; } From 8b2a2f4a793ec5ba735510c315a9d619439ac83c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 4 Nov 2009 13:48:41 -0800 Subject: [PATCH 20/23] bonding: Ignore updelay if there is no active slave. If all slaves on a bond are down but some are waiting for an updelay, enable the slave with the shortest amount of delay remaining. This would already occur if all other slaves were disabled at the time the delay was to begin but not if a delay was already in progress. This also immediately sends learning packets out in both situations, which prevents incoming packets to disabled slaves from being blackholed. CC: Danny Wannagat --- vswitchd/bridge.c | 74 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index f4b2fb2c3..d4d30372d 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -216,6 +216,7 @@ static void bond_run(struct bridge *); static void bond_wait(struct bridge *); static void bond_rebalance_port(struct port *); static void bond_send_learning_packets(struct port *); +static void bond_enable_slave(struct iface *iface, bool enable); static void port_create(struct bridge *, const char *name); static void port_reconfigure(struct port *); @@ -1416,13 +1417,31 @@ lookup_bond_entry(const struct port *port, const uint8_t mac[ETH_ADDR_LEN]) static int bond_choose_iface(const struct port *port) { - size_t i; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + size_t i, best_down_slave = -1; + long long next_delay_expiration = LLONG_MAX; + for (i = 0; i < port->n_ifaces; i++) { - if (port->ifaces[i]->enabled) { + struct iface *iface = port->ifaces[i]; + + if (iface->enabled) { return i; + } else if (iface->delay_expires < next_delay_expiration) { + best_down_slave = i; + next_delay_expiration = iface->delay_expires; } } - return -1; + + if (best_down_slave != -1) { + struct iface *iface = port->ifaces[best_down_slave]; + + VLOG_INFO_RL(&rl, "interface %s: skipping remaining %lli ms updelay " + "since no other interface is up", iface->name, + iface->delay_expires - time_msec()); + bond_enable_slave(iface, true); + } + + return best_down_slave; } static bool @@ -1472,10 +1491,12 @@ bond_link_status_update(struct iface *iface, bool carrier) iface->delay_expires = LLONG_MAX; VLOG_INFO_RL(&rl, "interface %s: will not be %s", iface->name, carrier ? "disabled" : "enabled"); - } else if (carrier && port->updelay && port->active_iface < 0) { - iface->delay_expires = time_msec(); - VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no " - "other interface is up", iface->name, port->updelay); + } else if (carrier && port->active_iface < 0) { + bond_enable_slave(iface, true); + if (port->updelay) { + VLOG_INFO_RL(&rl, "interface %s: skipping %d ms updelay since no " + "other interface is up", iface->name, port->updelay); + } } else { int delay = carrier ? port->updelay : port->downdelay; iface->delay_expires = time_msec() + delay; @@ -1512,6 +1533,12 @@ bond_enable_slave(struct iface *iface, bool enable) struct port *port = iface->port; struct bridge *br = port->bridge; + /* This acts as a recursion check. If the act of disabling a slave + * causes a different slave to be enabled, the flag will allow us to + * skip redundant work when we reenter this function. It must be + * cleared on exit to keep things safe with multiple bonds. */ + static bool moving_active_iface = false; + iface->delay_expires = LLONG_MAX; if (enable == iface->enabled) { return; @@ -1524,19 +1551,29 @@ bond_enable_slave(struct iface *iface, bool enable) if (iface->port_ifidx == port->active_iface) { ofproto_revalidate(br->ofproto, port->active_iface_tag); + + /* Disabling a slave can lead to another slave being immediately + * enabled if there will be no active slaves but one is waiting + * on an updelay. In this case we do not need to run most of the + * code for the newly enabled slave since there was no period + * without an active slave and it is redundant with the disabling + * path. */ + moving_active_iface = true; bond_choose_active_iface(port); } bond_send_learning_packets(port); } else { VLOG_WARN("interface %s: enabled", iface->name); - if (port->active_iface < 0) { + if (port->active_iface < 0 && !moving_active_iface) { ofproto_revalidate(br->ofproto, port->no_ifaces_tag); bond_choose_active_iface(port); bond_send_learning_packets(port); } iface->tag = tag_create_random(); } - port_update_bond_compat(port); + + moving_active_iface = false; + port->bond_compat_is_stale = true; } static void @@ -1547,20 +1584,19 @@ bond_run(struct bridge *br) for (i = 0; i < br->n_ports; i++) { struct port *port = br->ports[i]; + if (port->n_ifaces >= 2) { + for (j = 0; j < port->n_ifaces; j++) { + struct iface *iface = port->ifaces[j]; + if (time_msec() >= iface->delay_expires) { + bond_enable_slave(iface, !iface->enabled); + } + } + } + if (port->bond_compat_is_stale) { port->bond_compat_is_stale = false; port_update_bond_compat(port); } - - if (port->n_ifaces < 2) { - continue; - } - for (j = 0; j < port->n_ifaces; j++) { - struct iface *iface = port->ifaces[j]; - if (time_msec() >= iface->delay_expires) { - bond_enable_slave(iface, !iface->enabled); - } - } } } From 8ddb3f376d06374525cea4aa647462d8f520ed78 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Mon, 9 Nov 2009 16:06:52 -0800 Subject: [PATCH 21/23] vconn: Clean-up "match" typo in comments A few comments referenced "m", when "match" was clearly meant. This was likely due to a quick search and replace that scooped up these comments along with the intended code. This cleans that up. --- lib/vconn.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/vconn.c b/lib/vconn.c index 288a09581..a3ce214eb 100644 --- a/lib/vconn.c +++ b/lib/vconn.c @@ -1377,7 +1377,7 @@ normalize_match(struct ofp_match *m) if (wc & OFPFW_DL_TYPE) { m->dl_type = 0; - /* Can't sensibly m on network or transport headers if the + /* Can't sensibly match on network or transport headers if the * data link type is unknown. */ wc |= OFPFW_NW | OFPFW_TP; m->nw_src = m->nw_dst = m->nw_proto = 0; @@ -1386,7 +1386,7 @@ normalize_match(struct ofp_match *m) if (wc & OFPFW_NW_PROTO) { m->nw_proto = 0; - /* Can't sensibly m on transport headers if the network + /* Can't sensibly match on transport headers if the network * protocol is unknown. */ wc |= OFPFW_TP; m->tp_src = m->tp_dst = 0; @@ -1401,7 +1401,7 @@ normalize_match(struct ofp_match *m) } } else { /* Transport layer fields will always be extracted as zeros, so we - * can do an exact-m on those values. */ + * can do an exact-match on those values. */ wc &= ~OFPFW_TP; m->tp_src = m->tp_dst = 0; } @@ -1413,7 +1413,7 @@ normalize_match(struct ofp_match *m) } } else { /* Network and transport layer fields will always be extracted as - * zeros, so we can do an exact-m on those values. */ + * zeros, so we can do an exact-match on those values. */ wc &= ~(OFPFW_NW | OFPFW_TP); m->nw_proto = m->nw_src = m->nw_dst = 0; m->tp_src = m->tp_dst = 0; From 93dfc06772a722abf3362606307bab9a9bf33292 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 9 Nov 2009 15:26:51 -0800 Subject: [PATCH 22/23] bridge: Require learning table at all times. The bridge nominally allowed the MAC learning module to not be enabled though in reality it was always used. Tracking active MAC addresses in the bridge is useful for other reasons besides deciding the output port - primarily for bonding. In addition there were several bugs that would have been triggered had learning actually been disabled since that code path is never tested. This makes it explicit that the learning table should be maintained at all times. --- vswitchd/bridge.c | 91 ++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 53 deletions(-) diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index d4d30372d..84b42d792 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -147,7 +147,7 @@ struct port { struct bridge { struct list node; /* Node in global list of bridges. */ char *name; /* User-specified arbitrary name. */ - struct mac_learning *ml; /* MAC learning table, or null not to learn. */ + struct mac_learning *ml; /* MAC learning table. */ bool sent_config_request; /* Successfully sent config request? */ uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */ @@ -846,9 +846,7 @@ bridge_wait(void) continue; } - if (br->ml) { - mac_learning_wait(br->ml); - } + mac_learning_wait(br->ml); bond_wait(br); brstp_wait(br); } @@ -861,9 +859,7 @@ bridge_flush(struct bridge *br) { COVERAGE_INC(bridge_flush); br->flush = true; - if (br->ml) { - mac_learning_flush(br->ml); - } + mac_learning_flush(br->ml); } /* Bridge unixctl user interface functions. */ @@ -872,6 +868,7 @@ bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args) { struct ds ds = DS_EMPTY_INITIALIZER; const struct bridge *br; + const struct mac_entry *e; br = bridge_lookup(args); if (!br) { @@ -880,16 +877,13 @@ bridge_unixctl_fdb_show(struct unixctl_conn *conn, const char *args) } ds_put_cstr(&ds, " port VLAN MAC Age\n"); - if (br->ml) { - const struct mac_entry *e; - LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) { - if (e->port < 0 || e->port >= br->n_ports) { - continue; - } - ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n", - br->ports[e->port]->ifaces[0]->dp_ifidx, - e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e)); + LIST_FOR_EACH (e, struct mac_entry, lru_node, &br->ml->lrus) { + if (e->port < 0 || e->port >= br->n_ports) { + continue; } + ds_put_format(&ds, "%5d %4d "ETH_ADDR_FMT" %3d\n", + br->ports[e->port]->ifaces[0]->dp_ifidx, + e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(e)); } unixctl_command_reply(conn, 200, ds_cstr(&ds)); ds_destroy(&ds); @@ -1031,9 +1025,7 @@ bridge_run_one(struct bridge *br) return error; } - if (br->ml) { - mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto)); - } + mac_learning_run(br->ml, ofproto_get_revalidate_set(br->ofproto)); bond_run(br); brstp_run(br); @@ -1859,6 +1851,7 @@ process_flow(struct bridge *br, const flow_t *flow, struct port *in_port; struct port *out_port = NULL; /* By default, drop the packet/flow. */ int vlan; + int out_port_idx; /* Find the interface and port structure for the received packet. */ in_iface = iface_from_dp_ifidx(br, flow->in_port); @@ -1969,37 +1962,33 @@ process_flow(struct bridge *br, const flow_t *flow, /* MAC learning. */ out_port = FLOOD_PORT; - if (br->ml) { - int out_port_idx; - - /* Learn source MAC (but don't try to learn from revalidation). */ - if (packet) { - tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src, - vlan, in_port->port_idx); - if (rev_tag) { - /* The log messages here could actually be useful in debugging, - * so keep the rate limit relatively high. */ - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, - 300); - VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is " - "on port %s in VLAN %d", - br->name, ETH_ADDR_ARGS(flow->dl_src), - in_port->name, vlan); - ofproto_revalidate(br->ofproto, rev_tag); - } + /* Learn source MAC (but don't try to learn from revalidation). */ + if (packet) { + tag_type rev_tag = mac_learning_learn(br->ml, flow->dl_src, + vlan, in_port->port_idx); + if (rev_tag) { + /* The log messages here could actually be useful in debugging, + * so keep the rate limit relatively high. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, + 300); + VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is " + "on port %s in VLAN %d", + br->name, ETH_ADDR_ARGS(flow->dl_src), + in_port->name, vlan); + ofproto_revalidate(br->ofproto, rev_tag); } + } - /* Determine output port. */ - out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, - tags); - if (out_port_idx >= 0 && out_port_idx < br->n_ports) { - out_port = br->ports[out_port_idx]; - } else if (!packet) { - /* If we are revalidating but don't have a learning entry then - * eject the flow. Installing a flow that floods packets will - * prevent us from seeing future packets and learning properly. */ - return false; - } + /* Determine output port. */ + out_port_idx = mac_learning_lookup_tag(br->ml, flow->dl_dst, vlan, + tags); + if (out_port_idx >= 0 && out_port_idx < br->n_ports) { + out_port = br->ports[out_port_idx]; + } else if (!packet) { + /* If we are revalidating but don't have a learning entry then + * eject the flow. Installing a flow that floods packets will + * prevent us from seeing future packets and learning properly. */ + return false; } /* Don't send packets out their input ports. Don't forward frames that STP @@ -2435,7 +2424,7 @@ bond_send_learning_packets(struct port *port) struct ofpbuf packet; int error, n_packets, n_errors; - if (!port->n_ifaces || port->active_iface < 0 || !br->ml) { + if (!port->n_ifaces || port->active_iface < 0) { return; } @@ -2590,10 +2579,6 @@ bond_unixctl_show(struct unixctl_conn *conn, const char *args) hash, be->tx_bytes / 1024); /* MACs. */ - if (!port->bridge->ml) { - break; - } - LIST_FOR_EACH (me, struct mac_entry, lru_node, &port->bridge->ml->lrus) { uint16_t dp_ifidx; From f2d7fd66cf51b83acbe509f8ef6d4b34538d0646 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 6 Nov 2009 17:13:51 -0800 Subject: [PATCH 23/23] mirroring: Allow learning to be disabled on a VLAN. RSPAN does not work properly unless MAC learning for the VLAN is disabled on all switches between the origin and monitoring point. This allows learning to be disabled on a given VLAN so vSwitch can acts as an intermediate switch. Feature #2136 --- lib/mac-learning.c | 35 ++++++++++++++++++++++++++++++++- lib/mac-learning.h | 3 +++ vswitchd/bridge.c | 24 +++++++++++++++++++++- vswitchd/ovs-vswitchd.conf.5.in | 7 +++++-- 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/lib/mac-learning.c b/lib/mac-learning.c index c9b7d3e73..3f1db1461 100644 --- a/lib/mac-learning.c +++ b/lib/mac-learning.c @@ -21,6 +21,7 @@ #include #include +#include "bitmap.h" #include "coverage.h" #include "hash.h" #include "list.h" @@ -129,6 +130,7 @@ mac_learning_create(void) list_push_front(&ml->free, &s->lru_node); } ml->secret = random_uint32(); + ml->non_learning_vlans = NULL; return ml; } @@ -136,9 +138,36 @@ mac_learning_create(void) void mac_learning_destroy(struct mac_learning *ml) { + if (ml) { + bitmap_free(ml->non_learning_vlans); + } free(ml); } +/* Provides a bitmap of VLANs which have learning disabled. It takes + * ownership of the bitmap. Returns true if the set has changed from + * the previous value. */ +bool +mac_learning_set_disabled_vlans(struct mac_learning *ml, unsigned long *bitmap) +{ + bool ret = (bitmap == NULL + ? ml->non_learning_vlans != NULL + : (ml->non_learning_vlans == NULL + || !bitmap_equal(bitmap, ml->non_learning_vlans, 4096))); + + bitmap_free(ml->non_learning_vlans); + ml->non_learning_vlans = bitmap; + + return ret; +} + +static bool +is_learning_vlan(const struct mac_learning *ml, uint16_t vlan) +{ + return !(ml->non_learning_vlans + && bitmap_is_set(ml->non_learning_vlans, vlan)); +} + /* Attempts to make 'ml' learn from the fact that a frame from 'src_mac' was * just observed arriving from 'src_port' on the given 'vlan'. * @@ -156,6 +185,10 @@ mac_learning_learn(struct mac_learning *ml, struct mac_entry *e; struct list *bucket; + if (!is_learning_vlan(ml, vlan)) { + return 0; + } + if (eth_addr_is_multicast(src_mac)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 30); VLOG_DBG_RL(&rl, "multicast packet source "ETH_ADDR_FMT, @@ -216,7 +249,7 @@ mac_learning_lookup_tag(const struct mac_learning *ml, const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan, tag_type *tag) { - if (eth_addr_is_multicast(dst)) { + if (eth_addr_is_multicast(dst) || !is_learning_vlan(ml, vlan)) { return -1; } else { struct mac_entry *e = search_bucket(mac_table_bucket(ml, dst, vlan), diff --git a/lib/mac-learning.h b/lib/mac-learning.h index e2ee74ba4..ed843cd45 100644 --- a/lib/mac-learning.h +++ b/lib/mac-learning.h @@ -52,10 +52,13 @@ struct mac_learning { struct list table[MAC_HASH_SIZE]; /* Hash table. */ struct mac_entry entries[MAC_MAX]; /* All entries. */ uint32_t secret; /* Secret for */ + unsigned long *non_learning_vlans; /* Bitmap of learning disabled VLANs. */ }; struct mac_learning *mac_learning_create(void); void mac_learning_destroy(struct mac_learning *); +bool mac_learning_set_disabled_vlans(struct mac_learning *, + unsigned long *bitmap); tag_type mac_learning_learn(struct mac_learning *, const uint8_t src[ETH_ADDR_LEN], uint16_t vlan, uint16_t src_port); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 84b42d792..1e55ad2f3 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -3265,7 +3265,8 @@ static void mirror_reconfigure(struct bridge *br) { struct svec old_mirrors, new_mirrors; - size_t i; + size_t i, n_rspan_vlans; + unsigned long *rspan_vlans; /* Collect old and new mirrors. */ svec_init(&old_mirrors); @@ -3314,6 +3315,27 @@ mirror_reconfigure(struct bridge *br) m->out_port->is_mirror_output_port = true; } } + + /* Update learning disabled vlans (for RSPAN). */ + rspan_vlans = NULL; + n_rspan_vlans = cfg_count("vlan.%s.disable-learning", br->name); + if (n_rspan_vlans) { + rspan_vlans = bitmap_allocate(4096); + + for (i = 0; i < n_rspan_vlans; i++) { + int vlan = cfg_get_vlan(i, "vlan.%s.disable-learning", br->name); + if (vlan >= 0) { + bitmap_set1(rspan_vlans, vlan); + } else { + VLOG_ERR("bridge %s: invalid value '%s' for learning disabled " + "VLAN", br->name, + cfg_get_string(i, "vlan.%s.disable-learning", br->name)); + } + } + } + if (mac_learning_set_disabled_vlans(br->ml, rspan_vlans)) { + bridge_flush(br); + } } static void diff --git a/vswitchd/ovs-vswitchd.conf.5.in b/vswitchd/ovs-vswitchd.conf.5.in index 7c3c87f3e..ef9d75979 100644 --- a/vswitchd/ovs-vswitchd.conf.5.in +++ b/vswitchd/ovs-vswitchd.conf.5.in @@ -347,12 +347,15 @@ on port 1, disrupting connectivity. If mirroring to a VLAN is desired in this scenario, then the physical switch must be replaced by one that learns Ethernet addresses on a per-VLAN basis. In addition, learning should be disabled on the VLAN containing mirrored traffic. -If this is not done then the intermediate switch will learn the MAC +If this is not done then intermediate switches will learn the MAC address of each end host from the mirrored traffic. If packets being sent to that end host are also mirrored, then they will be dropped since the switch will attempt to send them out the input port. Disabling learning for the VLAN will cause the switch to correctly -send the packet out all ports configured for that VLAN. +send the packet out all ports configured for that VLAN. If Open +vSwitch is being used as an intermediate switch learning can be disabled +by setting the key \fBvlan.\fIbrname\fB.learning-disable=\fIvid\fR +to the mirrored VLAN. .ST "Example" The following \fBovs\-vswitchd\fR configuration copies all frames received on \fBeth1\fR or \fBeth2\fR to \fBeth3\fR.