From b78f6b77bcaefc7b1e480aa6063091cb9ad50ad2 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 26 Apr 2011 09:42:18 -0700
Subject: [PATCH 01/40] Remove support for obsolete "tun_id_from_cookie"
 extension.

The "tun_id_from_cookie" OpenFlow extension predated NXM and supports only
a fraction of its features.  Nothing (at Nicira, anyway) uses it any
longer.  Support for it had been broken since January and it took until a
few days ago for anyone to complain, so it cannot be too important.  This
commit removes it.
---
 ChangeLog                     |   6 +
 include/openflow/nicira-ext.h |  23 +---
 lib/learning-switch.c         |   1 -
 lib/ofp-parse.c               |   2 +-
 lib/ofp-print.c               |  22 +---
 lib/ofp-util.c                | 233 +++++++---------------------------
 lib/ofp-util.h                |  24 +---
 ofproto/ofproto.c             |  34 +----
 tests/ofp-print.at            |  10 --
 tests/ofproto.at              |   4 -
 tests/ovs-ofctl.at            |  17 +--
 tests/test-flows.c            |   5 +-
 utilities/ovs-ofctl.8.in      |   5 -
 utilities/ovs-ofctl.c         |   9 +-
 14 files changed, 82 insertions(+), 313 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 76d8e2983..18f922808 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+post v1.1.0
+------------------------
+    - Feature removals:
+      - Dropped support for "tun_id_from_cookie" OpenFlow extension.
+	(Use the extensible match extensions instead.)
+
 v1.1.0 - 05 Apr 2011
 ------------------------
     - Ability to define policies over IPv6
diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index 292a98738..241cfe99f 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -136,10 +136,7 @@ enum nicira_type {
     NXT_FLOW_END_CONFIG__OBSOLETE,
     NXT_FLOW_END__OBSOLETE,
     NXT_MGMT__OBSOLETE,
-
-    /* Use the high 32 bits of the cookie field as the tunnel ID in the flow
-     * match. */
-    NXT_TUN_ID_FROM_COOKIE,
+    NXT_TUN_ID_FROM_COOKIE__OBSOLETE,
 
     /* Controller role support.  The request body is struct nx_role_request.
      * The reply echos the request. */
@@ -170,16 +167,6 @@ enum nicira_stats_type {
     NXST_AGGREGATE              /* Analogous to OFPST_AGGREGATE. */
 };
 
-/* NXT_TUN_ID_FROM_COOKIE request. */
-struct nxt_tun_id_cookie {
-    struct ofp_header header;
-    ovs_be32 vendor;            /* NX_VENDOR_ID. */
-    ovs_be32 subtype;           /* NXT_TUN_ID_FROM_COOKIE */
-    uint8_t set;                /* Nonzero to enable, zero to disable. */
-    uint8_t pad[7];
-};
-OFP_ASSERT(sizeof(struct nxt_tun_id_cookie) == 24);
-
 /* Configures the "role" of the sending controller.  The default role is:
  *
  *    - Other (NX_ROLE_OTHER), which allows the controller access to all
@@ -604,12 +591,6 @@ enum nx_mp_algorithm {
      */
     NX_MP_ALG_ITER_HASH         /* Iterative Hash. */
 };
-
-/* Wildcard for tunnel ID. */
-#define NXFW_TUN_ID  (1 << 25)
-
-#define NXFW_ALL NXFW_TUN_ID
-#define OVSFW_ALL (OFPFW_ALL | NXFW_ALL)
 
 /* Action structure for NXAST_AUTOPATH.
  *
@@ -1165,8 +1146,6 @@ OFP_ASSERT(sizeof(struct nx_action_autopath) == 24);
 
 enum nx_flow_format {
     NXFF_OPENFLOW10 = 0,         /* Standard OpenFlow 1.0 compatible. */
-    NXFF_TUN_ID_FROM_COOKIE = 1, /* OpenFlow 1.0, plus obtain tunnel ID from
-                                  * cookie. */
     NXFF_NXM = 2                 /* Nicira extended match. */
 };
 
diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index dc9af7739..e003b8f99 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -238,7 +238,6 @@ lswitch_process_packet(struct lswitch *sw, struct rconn *rconn,
     case OFPUTIL_OFPST_PORT_REPLY:
     case OFPUTIL_OFPST_TABLE_REPLY:
     case OFPUTIL_OFPST_AGGREGATE_REPLY:
-    case OFPUTIL_NXT_TUN_ID_FROM_COOKIE:
     case OFPUTIL_NXT_ROLE_REQUEST:
     case OFPUTIL_NXT_ROLE_REPLY:
     case OFPUTIL_NXT_SET_FLOW_FORMAT:
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 7e9a96531..f45c450c6 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -859,7 +859,7 @@ parse_ofp_flow_mod_str(struct list *packets, enum nx_flow_format *cur_format,
     parse_ofp_str(&fm, NULL, is_del ? NULL : &actions, string);
     fm.command = command;
 
-    min_format = ofputil_min_flow_format(&fm.cr, true, fm.cookie);
+    min_format = ofputil_min_flow_format(&fm.cr);
     next_format = MAX(*cur_format, min_format);
     if (next_format != *cur_format) {
         struct ofpbuf *sff = ofputil_make_set_flow_format(next_format);
diff --git a/lib/ofp-print.c b/lib/ofp-print.c
index 30f6d3742..65dea728b 100644
--- a/lib/ofp-print.c
+++ b/lib/ofp-print.c
@@ -780,9 +780,6 @@ ofp_match_to_string(const struct ofp_match *om, int verbosity)
             skip_type = false;
         }
     }
-    if (w & NXFW_TUN_ID) {
-        ds_put_cstr(&f, "tun_id_wild,");
-    }
     print_wild(&f, "in_port=", w & OFPFW_IN_PORT, verbosity,
                "%d", ntohs(om->in_port));
     print_wild(&f, "dl_vlan=", w & OFPFW_DL_VLAN, verbosity,
@@ -837,7 +834,7 @@ ofp_print_flow_mod(struct ds *s, const struct ofp_header *oh,
     bool need_priority;
     int error;
 
-    error = ofputil_decode_flow_mod(&fm, oh, NXFF_OPENFLOW10);
+    error = ofputil_decode_flow_mod(&fm, oh);
     if (error) {
         ofp_print_error(s, error);
         return;
@@ -934,7 +931,7 @@ ofp_print_flow_removed(struct ds *string, const struct ofp_header *oh)
     struct ofputil_flow_removed fr;
     int error;
 
-    error = ofputil_decode_flow_removed(&fr, oh, NXFF_OPENFLOW10);
+    error = ofputil_decode_flow_removed(&fr, oh);
     if (error) {
         ofp_print_error(string, error);
         return;
@@ -1070,7 +1067,7 @@ ofp_print_flow_stats_request(struct ds *string, const struct ofp_header *oh)
     struct flow_stats_request fsr;
     int error;
 
-    error = ofputil_decode_flow_stats_request(&fsr, oh, NXFF_OPENFLOW10);
+    error = ofputil_decode_flow_stats_request(&fsr, oh);
     if (error) {
         ofp_print_error(string, error);
         return;
@@ -1103,7 +1100,7 @@ ofp_print_flow_stats_reply(struct ds *string, const struct ofp_header *oh)
         struct ofputil_flow_stats fs;
         int retval;
 
-        retval = ofputil_decode_flow_stats_reply(&fs, &b, NXFF_OPENFLOW10);
+        retval = ofputil_decode_flow_stats_reply(&fs, &b);
         if (retval) {
             if (retval != EOF) {
                 ds_put_cstr(string, " ***parse error***");
@@ -1331,13 +1328,6 @@ ofp_print_echo(struct ds *string, const struct ofp_header *oh, int verbosity)
     }
 }
 
-static void
-ofp_print_nxt_tun_id_from_cookie(struct ds *string,
-                                 const struct nxt_tun_id_cookie *ntic)
-{
-    ds_put_format(string, " set=%"PRIu8, ntic->set);
-}
-
 static void
 ofp_print_nxt_role_message(struct ds *string,
                            const struct nx_role_request *nrr)
@@ -1507,10 +1497,6 @@ ofp_to_string__(const struct ofp_header *oh,
         ofp_print_ofpst_aggregate_reply(string, oh);
         break;
 
-    case OFPUTIL_NXT_TUN_ID_FROM_COOKIE:
-        ofp_print_nxt_tun_id_from_cookie(string, msg);
-        break;
-
     case OFPUTIL_NXT_ROLE_REQUEST:
     case OFPUTIL_NXT_ROLE_REPLY:
         ofp_print_nxt_role_message(string, msg);
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 9541d6973..5aa2b825e 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -102,25 +102,17 @@ enum {
 };
 
 /* Converts the ofp_match in 'match' into a cls_rule in 'rule', with the given
- * 'priority'.
- *
- * 'flow_format' must either NXFF_OPENFLOW10 or NXFF_TUN_ID_FROM_COOKIE.  In
- * the latter case only, 'flow''s tun_id field will be taken from the high bits
- * of 'cookie', if 'match''s wildcards do not indicate that tun_id is
- * wildcarded. */
+ * 'priority'. */
 void
 ofputil_cls_rule_from_match(const struct ofp_match *match,
-                            unsigned int priority,
-                            enum nx_flow_format flow_format,
-                            ovs_be64 cookie, struct cls_rule *rule)
+                            unsigned int priority, struct cls_rule *rule)
 {
     struct flow_wildcards *wc = &rule->wc;
     unsigned int ofpfw;
     ovs_be16 vid, pcp;
 
     /* Initialize rule->priority. */
-    ofpfw = ntohl(match->wildcards);
-    ofpfw &= flow_format == NXFF_TUN_ID_FROM_COOKIE ? OVSFW_ALL : OFPFW_ALL;
+    ofpfw = ntohl(match->wildcards) & OFPFW_ALL;
     rule->priority = !ofpfw ? UINT16_MAX : priority;
 
     /* Initialize most of rule->wc. */
@@ -136,10 +128,6 @@ ofputil_cls_rule_from_match(const struct ofp_match *match,
     wc->nw_src_mask = ofputil_wcbits_to_netmask(ofpfw >> OFPFW_NW_SRC_SHIFT);
     wc->nw_dst_mask = ofputil_wcbits_to_netmask(ofpfw >> OFPFW_NW_DST_SHIFT);
 
-    if (flow_format == NXFF_TUN_ID_FROM_COOKIE && !(ofpfw & NXFW_TUN_ID)) {
-        cls_rule_set_tun_id(rule, htonll(ntohll(cookie) >> 32));
-    }
-
     if (ofpfw & OFPFW_DL_DST) {
         /* OpenFlow 1.0 OFPFW_DL_DST covers the whole Ethernet destination, but
          * Open vSwitch breaks the Ethernet destination into bits as FWW_DL_DST
@@ -207,21 +195,9 @@ ofputil_cls_rule_from_match(const struct ofp_match *match,
     cls_rule_zero_wildcarded_fields(rule);
 }
 
-/* Convert 'rule' into the OpenFlow match structure 'match'.  'flow_format'
- * must either NXFF_OPENFLOW10 or NXFF_TUN_ID_FROM_COOKIE.
- *
- * The NXFF_TUN_ID_FROM_COOKIE flow format requires modifying the flow cookie.
- * This function can help with that, if 'cookie_out' is nonnull.  For
- * NXFF_OPENFLOW10, or if the tunnel ID is wildcarded, 'cookie_in' will be
- * copied directly to '*cookie_out'.  For NXFF_TUN_ID_FROM_COOKIE when tunnel
- * ID is matched, 'cookie_in' will be modified appropriately before setting
- * '*cookie_out'.
- */
+/* Convert 'rule' into the OpenFlow match structure 'match'. */
 void
-ofputil_cls_rule_to_match(const struct cls_rule *rule,
-                          enum nx_flow_format flow_format,
-                          struct ofp_match *match,
-                          ovs_be64 cookie_in, ovs_be64 *cookie_out)
+ofputil_cls_rule_to_match(const struct cls_rule *rule, struct ofp_match *match)
 {
     const struct flow_wildcards *wc = &rule->wc;
     unsigned int ofpfw;
@@ -234,20 +210,6 @@ ofputil_cls_rule_to_match(const struct cls_rule *rule,
         ofpfw |= OFPFW_NW_TOS;
     }
 
-    /* Tunnel ID. */
-    if (flow_format == NXFF_TUN_ID_FROM_COOKIE) {
-        if (wc->tun_id_mask == htonll(0)) {
-            ofpfw |= NXFW_TUN_ID;
-        } else {
-            uint32_t cookie_lo = ntohll(cookie_in);
-            uint32_t cookie_hi = ntohll(rule->flow.tun_id);
-            cookie_in = htonll(cookie_lo | ((uint64_t) cookie_hi << 32));
-        }
-    }
-    if (cookie_out) {
-        *cookie_out = cookie_in;
-    }
-
     /* Translate VLANs. */
     match->dl_vlan = htons(0);
     match->dl_vlan_pcp = 0;
@@ -400,10 +362,6 @@ ofputil_decode_vendor(const struct ofp_header *oh,
                       const struct ofputil_msg_type **typep)
 {
     static const struct ofputil_msg_type nxt_messages[] = {
-        { OFPUTIL_NXT_TUN_ID_FROM_COOKIE,
-          NXT_TUN_ID_FROM_COOKIE, "NXT_TUN_ID_FROM_COOKIE",
-          sizeof(struct nxt_tun_id_cookie), 0 },
-
         { OFPUTIL_NXT_ROLE_REQUEST,
           NXT_ROLE_REQUEST, "NXT_ROLE_REQUEST",
           sizeof(struct nx_role_request), 0 },
@@ -800,7 +758,6 @@ ofputil_flow_format_is_valid(enum nx_flow_format flow_format)
 {
     switch (flow_format) {
     case NXFF_OPENFLOW10:
-    case NXFF_TUN_ID_FROM_COOKIE:
     case NXFF_NXM:
         return true;
     }
@@ -814,8 +771,6 @@ ofputil_flow_format_to_string(enum nx_flow_format flow_format)
     switch (flow_format) {
     case NXFF_OPENFLOW10:
         return "openflow10";
-    case NXFF_TUN_ID_FROM_COOKIE:
-        return "tun_id_from_cookie";
     case NXFF_NXM:
         return "nxm";
     default:
@@ -827,7 +782,6 @@ int
 ofputil_flow_format_from_string(const char *s)
 {
     return (!strcmp(s, "openflow10") ? NXFF_OPENFLOW10
-            : !strcmp(s, "tun_id_from_cookie") ? NXFF_TUN_ID_FROM_COOKIE
             : !strcmp(s, "nxm") ? NXFF_NXM
             : -1);
 }
@@ -845,100 +799,43 @@ regs_fully_wildcarded(const struct flow_wildcards *wc)
     return true;
 }
 
-static inline bool
-is_nxm_required(const struct cls_rule *rule, bool cookie_support,
-                ovs_be64 cookie)
+/* Returns the minimum nx_flow_format to use for sending 'rule' to a switch
+ * (e.g. to add or remove a flow).  Only NXM can handle tunnel IDs, registers,
+ * or fixing the Ethernet multicast bit.  Otherwise, it's better to use
+ * NXFF_OPENFLOW10 for backward compatibility. */
+enum nx_flow_format
+ofputil_min_flow_format(const struct cls_rule *rule)
 {
     const struct flow_wildcards *wc = &rule->wc;
-    uint32_t cookie_hi;
-    uint64_t tun_id;
 
     /* Only NXM supports separately wildcards the Ethernet multicast bit. */
     if (!(wc->wildcards & FWW_DL_DST) != !(wc->wildcards & FWW_ETH_MCAST)) {
-        return true;
+        return NXFF_NXM;
     }
 
     /* Only NXM supports matching ARP hardware addresses. */
     if (!(wc->wildcards & FWW_ARP_SHA) || !(wc->wildcards & FWW_ARP_THA)) {
-        return true;
+        return NXFF_NXM;
     }
 
     /* Only NXM supports matching IPv6 traffic. */
     if (!(wc->wildcards & FWW_DL_TYPE)
             && (rule->flow.dl_type == htons(ETH_TYPE_IPV6))) {
-        return true;
+        return NXFF_NXM;
     }
 
     /* Only NXM supports matching registers. */
     if (!regs_fully_wildcarded(wc)) {
-        return true;
+        return NXFF_NXM;
     }
 
-    switch (wc->tun_id_mask) {
-    case CONSTANT_HTONLL(0):
-        /* Other formats can fully wildcard tun_id. */
-        break;
-
-    case CONSTANT_HTONLL(UINT64_MAX):
-        /* Only NXM supports tunnel ID matching without a cookie. */
-        if (!cookie_support) {
-            return true;
-        }
-
-        /* Only NXM supports 64-bit tunnel IDs. */
-        tun_id = ntohll(rule->flow.tun_id);
-        if (tun_id > UINT32_MAX) {
-            return true;
-        }
-
-        /* Only NXM supports a cookie whose top 32 bits conflict with the
-         * tunnel ID. */
-        cookie_hi = ntohll(cookie) >> 32;
-        if (cookie_hi && cookie_hi != tun_id) {
-            return true;
-        }
-        break;
-
-    default:
-        /* Only NXM supports partial matches on tunnel ID. */
-        return true;
+    /* Only NXM supports matching tun_id. */
+    if (wc->tun_id_mask != htonll(0)) {
+        return NXFF_NXM;
     }
 
     /* Other formats can express this rule. */
-    return false;
-}
-
-/* Returns the minimum nx_flow_format to use for sending 'rule' to a switch
- * (e.g. to add or remove a flow).  'cookie_support' should be true if the
- * command to be sent includes a flow cookie (as OFPT_FLOW_MOD does, for
- * example) or false if the command does not (OFPST_FLOW and OFPST_AGGREGATE do
- * not, for example).  If 'cookie_support' is true, then 'cookie' should be the
- * cookie to be sent; otherwise its value is ignored.
- *
- * The "best" flow format is chosen on this basis:
- *
- *   - It must be capable of expressing the rule.  NXFF_OPENFLOW10 flows can't
- *     handle tunnel IDs.  NXFF_TUN_ID_FROM_COOKIE flows can't handle registers
- *     or fixing the Ethernet multicast bit, and can't handle tunnel IDs that
- *     conflict with the high 32 bits of the cookie or commands that don't
- *     support cookies.
- *
- *   - Otherwise, the chosen format should be as backward compatible as
- *     possible.  (NXFF_OPENFLOW10 is more backward compatible than
- *     NXFF_TUN_ID_FROM_COOKIE, which is more backward compatible than
- *     NXFF_NXM.)
- */
-enum nx_flow_format
-ofputil_min_flow_format(const struct cls_rule *rule, bool cookie_support,
-                        ovs_be64 cookie)
-{
-    if (is_nxm_required(rule, cookie_support, cookie)) {
-        return NXFF_NXM;
-    } else if (rule->wc.tun_id_mask != htonll(0)) {
-        return NXFF_TUN_ID_FROM_COOKIE;
-    } else {
-        return NXFF_OPENFLOW10;
-    }
+    return NXFF_OPENFLOW10;
 }
 
 /* Returns an OpenFlow message that can be used to set the flow format to
@@ -946,20 +843,11 @@ ofputil_min_flow_format(const struct cls_rule *rule, bool cookie_support,
 struct ofpbuf *
 ofputil_make_set_flow_format(enum nx_flow_format flow_format)
 {
+    struct nxt_set_flow_format *sff;
     struct ofpbuf *msg;
 
-    if (flow_format == NXFF_OPENFLOW10
-        || flow_format == NXFF_TUN_ID_FROM_COOKIE) {
-        struct nxt_tun_id_cookie *tic;
-
-        tic = make_nxmsg(sizeof *tic, NXT_TUN_ID_FROM_COOKIE, &msg);
-        tic->set = flow_format == NXFF_TUN_ID_FROM_COOKIE;
-    } else {
-        struct nxt_set_flow_format *sff;
-
-        sff = make_nxmsg(sizeof *sff, NXT_SET_FLOW_FORMAT, &msg);
-        sff->format = htonl(flow_format);
-    }
+    sff = make_nxmsg(sizeof *sff, NXT_SET_FLOW_FORMAT, &msg);
+    sff->format = htonl(flow_format);
 
     return msg;
 }
@@ -968,14 +856,9 @@ ofputil_make_set_flow_format(enum nx_flow_format flow_format)
  * flow_mod in 'fm'.  Returns 0 if successful, otherwise an OpenFlow error
  * code.
  *
- * For OFPT_FLOW_MOD messages, 'flow_format' should be the current flow format
- * at the time when the message was received.  Otherwise 'flow_format' is
- * ignored.
- *
  * Does not validate the flow_mod actions. */
 int
-ofputil_decode_flow_mod(struct flow_mod *fm, const struct ofp_header *oh,
-                        enum nx_flow_format flow_format)
+ofputil_decode_flow_mod(struct flow_mod *fm, const struct ofp_header *oh)
 {
     const struct ofputil_msg_type *type;
     struct ofpbuf b;
@@ -1015,8 +898,7 @@ ofputil_decode_flow_mod(struct flow_mod *fm, const struct ofp_header *oh,
         }
 
         /* Translate the message. */
-        ofputil_cls_rule_from_match(&match, ntohs(ofm->priority), flow_format,
-                                    ofm->cookie, &fm->cr);
+        ofputil_cls_rule_from_match(&match, ntohs(ofm->priority), &fm->cr);
         fm->cookie = ofm->cookie;
         fm->command = ntohs(ofm->command);
         fm->idle_timeout = ntohs(ofm->idle_timeout);
@@ -1065,14 +947,13 @@ ofputil_encode_flow_mod(const struct flow_mod *fm,
     size_t actions_len = fm->n_actions * sizeof *fm->actions;
     struct ofpbuf *msg;
 
-    if (flow_format == NXFF_OPENFLOW10
-        || flow_format == NXFF_TUN_ID_FROM_COOKIE) {
+    if (flow_format == NXFF_OPENFLOW10) {
         struct ofp_flow_mod *ofm;
 
         msg = ofpbuf_new(sizeof *ofm + actions_len);
         ofm = put_openflow(sizeof *ofm, OFPT_FLOW_MOD, msg);
-        ofputil_cls_rule_to_match(&fm->cr, flow_format, &ofm->match,
-                                  fm->cookie, &ofm->cookie);
+        ofputil_cls_rule_to_match(&fm->cr, &ofm->match);
+        ofm->cookie = fm->cookie;
         ofm->command = htons(fm->command);
         ofm->idle_timeout = htons(fm->idle_timeout);
         ofm->hard_timeout = htons(fm->hard_timeout);
@@ -1110,13 +991,12 @@ ofputil_encode_flow_mod(const struct flow_mod *fm,
 static int
 ofputil_decode_ofpst_flow_request(struct flow_stats_request *fsr,
                                   const struct ofp_header *oh,
-                                  enum nx_flow_format flow_format,
                                   bool aggregate)
 {
     const struct ofp_flow_stats_request *ofsr = ofputil_stats_body(oh);
 
     fsr->aggregate = aggregate;
-    ofputil_cls_rule_from_match(&ofsr->match, 0, flow_format, 0, &fsr->match);
+    ofputil_cls_rule_from_match(&ofsr->match, 0, &fsr->match);
     fsr->out_port = ntohs(ofsr->out_port);
     fsr->table_id = ofsr->table_id;
 
@@ -1151,17 +1031,11 @@ ofputil_decode_nxst_flow_request(struct flow_stats_request *fsr,
 }
 
 /* Converts an OFPST_FLOW, OFPST_AGGREGATE, NXST_FLOW, or NXST_AGGREGATE
- * request 'oh', received when the current flow format was 'flow_format', into
- * an abstract flow_stats_request in 'fsr'.  Returns 0 if successful, otherwise
- * an OpenFlow error code.
- *
- * For OFPST_FLOW and OFPST_AGGREGATE messages, 'flow_format' should be the
- * current flow format at the time when the message was received.  Otherwise
- * 'flow_format' is ignored. */
+ * request 'oh', into an abstract flow_stats_request in 'fsr'.  Returns 0 if
+ * successful, otherwise an OpenFlow error code. */
 int
 ofputil_decode_flow_stats_request(struct flow_stats_request *fsr,
-                                  const struct ofp_header *oh,
-                                  enum nx_flow_format flow_format)
+                                  const struct ofp_header *oh)
 {
     const struct ofputil_msg_type *type;
     struct ofpbuf b;
@@ -1173,10 +1047,10 @@ ofputil_decode_flow_stats_request(struct flow_stats_request *fsr,
     code = ofputil_msg_type_code(type);
     switch (code) {
     case OFPUTIL_OFPST_FLOW_REQUEST:
-        return ofputil_decode_ofpst_flow_request(fsr, oh, flow_format, false);
+        return ofputil_decode_ofpst_flow_request(fsr, oh, false);
 
     case OFPUTIL_OFPST_AGGREGATE_REQUEST:
-        return ofputil_decode_ofpst_flow_request(fsr, oh, flow_format, true);
+        return ofputil_decode_ofpst_flow_request(fsr, oh, true);
 
     case OFPUTIL_NXST_FLOW_REQUEST:
         return ofputil_decode_nxst_flow_request(fsr, oh, false);
@@ -1199,8 +1073,7 @@ ofputil_encode_flow_stats_request(const struct flow_stats_request *fsr,
 {
     struct ofpbuf *msg;
 
-    if (flow_format == NXFF_OPENFLOW10
-        || flow_format == NXFF_TUN_ID_FROM_COOKIE) {
+    if (flow_format == NXFF_OPENFLOW10) {
         struct ofp_flow_stats_request *ofsr;
         int type;
 
@@ -1209,8 +1082,7 @@ ofputil_encode_flow_stats_request(const struct flow_stats_request *fsr,
 
         type = fsr->aggregate ? OFPST_AGGREGATE : OFPST_FLOW;
         ofsr = ofputil_make_stats_request(sizeof *ofsr, type, &msg);
-        ofputil_cls_rule_to_match(&fsr->match, flow_format, &ofsr->match,
-                                  0, NULL);
+        ofputil_cls_rule_to_match(&fsr->match, &ofsr->match);
         ofsr->table_id = fsr->table_id;
         ofsr->out_port = htons(fsr->out_port);
     } else if (flow_format == NXFF_NXM) {
@@ -1234,9 +1106,7 @@ ofputil_encode_flow_stats_request(const struct flow_stats_request *fsr,
 }
 
 /* Converts an OFPST_FLOW or NXST_FLOW reply in 'msg' into an abstract
- * ofputil_flow_stats in 'fs'.  For OFPST_FLOW messages, 'flow_format' should
- * be the current flow format at the time when the request corresponding to the
- * reply in 'msg' was sent.  Otherwise 'flow_format' is ignored.
+ * ofputil_flow_stats in 'fs'.
  *
  * Multiple OFPST_FLOW or NXST_FLOW replies can be packed into a single
  * OpenFlow message.  Calling this function multiple times for a single 'msg'
@@ -1247,8 +1117,7 @@ ofputil_encode_flow_stats_request(const struct flow_stats_request *fsr,
  * otherwise a positive errno value. */
 int
 ofputil_decode_flow_stats_reply(struct ofputil_flow_stats *fs,
-                                struct ofpbuf *msg,
-                                enum nx_flow_format flow_format)
+                                struct ofpbuf *msg)
 {
     const struct ofputil_msg_type *type;
     int code;
@@ -1293,7 +1162,7 @@ ofputil_decode_flow_stats_reply(struct ofputil_flow_stats *fs,
 
         fs->cookie = get_32aligned_be64(&ofs->cookie);
         ofputil_cls_rule_from_match(&ofs->match, ntohs(ofs->priority),
-                                    flow_format, fs->cookie, &fs->rule);
+                                    &fs->rule);
         fs->table_id = ofs->table_id;
         fs->duration_sec = ntohl(ofs->duration_sec);
         fs->duration_nsec = ntohl(ofs->duration_nsec);
@@ -1344,18 +1213,12 @@ ofputil_decode_flow_stats_reply(struct ofputil_flow_stats *fs,
     return 0;
 }
 
-/* Converts an OFPT_FLOW_REMOVED or NXT_FLOW_REMOVED message 'oh', received
- * when the current flow format was 'flow_format', into an abstract
- * ofputil_flow_removed in 'fr'.  Returns 0 if successful, otherwise an
- * OpenFlow error code.
- *
- * For OFPT_FLOW_REMOVED messages, 'flow_format' should be the current flow
- * format at the time when the message was received.  Otherwise 'flow_format'
- * is ignored. */
+/* Converts an OFPT_FLOW_REMOVED or NXT_FLOW_REMOVED message 'oh' into an
+ * abstract ofputil_flow_removed in 'fr'.  Returns 0 if successful, otherwise
+ * an OpenFlow error code. */
 int
 ofputil_decode_flow_removed(struct ofputil_flow_removed *fr,
-                            const struct ofp_header *oh,
-                            enum nx_flow_format flow_format)
+                            const struct ofp_header *oh)
 {
     const struct ofputil_msg_type *type;
     enum ofputil_msg_code code;
@@ -1367,7 +1230,7 @@ ofputil_decode_flow_removed(struct ofputil_flow_removed *fr,
 
         ofr = (const struct ofp_flow_removed *) oh;
         ofputil_cls_rule_from_match(&ofr->match, ntohs(ofr->priority),
-                                    flow_format, ofr->cookie, &fr->rule);
+                                    &fr->rule);
         fr->cookie = ofr->cookie;
         fr->reason = ofr->reason;
         fr->duration_sec = ntohl(ofr->duration_sec);
@@ -1415,14 +1278,12 @@ ofputil_encode_flow_removed(const struct ofputil_flow_removed *fr,
 {
     struct ofpbuf *msg;
 
-    if (flow_format == NXFF_OPENFLOW10
-        || flow_format == NXFF_TUN_ID_FROM_COOKIE) {
+    if (flow_format == NXFF_OPENFLOW10) {
         struct ofp_flow_removed *ofr;
 
         ofr = make_openflow_xid(sizeof *ofr, OFPT_FLOW_REMOVED, htonl(0),
                                 &msg);
-        ofputil_cls_rule_to_match(&fr->rule, flow_format, &ofr->match,
-                                  fr->cookie, &ofr->cookie);
+        ofputil_cls_rule_to_match(&fr->rule, &ofr->match);
         ofr->priority = htons(fr->rule.priority);
         ofr->reason = fr->reason;
         ofr->duration_sec = htonl(fr->duration_sec);
@@ -1717,7 +1578,7 @@ make_flow_mod(uint16_t command, const struct cls_rule *rule,
     ofm->header.length = htons(size);
     ofm->cookie = 0;
     ofm->priority = htons(MIN(rule->priority, UINT16_MAX));
-    ofputil_cls_rule_to_match(rule, NXFF_OPENFLOW10, &ofm->match, 0, NULL);
+    ofputil_cls_rule_to_match(rule, &ofm->match);
     ofm->command = htons(command);
     return out;
 }
@@ -2175,7 +2036,7 @@ normalize_match(struct ofp_match *m)
     enum { OFPFW_TP = OFPFW_TP_SRC | OFPFW_TP_DST };
     uint32_t wc;
 
-    wc = ntohl(m->wildcards) & OVSFW_ALL;
+    wc = ntohl(m->wildcards) & OFPFW_ALL;
     if (wc & OFPFW_DL_TYPE) {
         m->dl_type = 0;
 
diff --git a/lib/ofp-util.h b/lib/ofp-util.h
index fdeb9d9e3..6e69bae05 100644
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -71,7 +71,6 @@ enum ofputil_msg_code {
     OFPUTIL_OFPST_AGGREGATE_REPLY,
 
     /* NXT_* messages. */
-    OFPUTIL_NXT_TUN_ID_FROM_COOKIE,
     OFPUTIL_NXT_ROLE_REQUEST,
     OFPUTIL_NXT_ROLE_REPLY,
     OFPUTIL_NXT_SET_FLOW_FORMAT,
@@ -100,11 +99,8 @@ int ofputil_netmask_to_wcbits(ovs_be32 netmask);
 
 /* Work with OpenFlow 1.0 ofp_match. */
 void ofputil_cls_rule_from_match(const struct ofp_match *,
-                                 unsigned int priority, enum nx_flow_format,
-                                 ovs_be64 cookie, struct cls_rule *);
-void ofputil_cls_rule_to_match(const struct cls_rule *, enum nx_flow_format,
-                               struct ofp_match *,
-                               ovs_be64 cookie_in, ovs_be64 *cookie_out);
+                                 unsigned int priority, struct cls_rule *);
+void ofputil_cls_rule_to_match(const struct cls_rule *, struct ofp_match *);
 void normalize_match(struct ofp_match *);
 char *ofp_match_to_literal_string(const struct ofp_match *match);
 
@@ -116,9 +112,7 @@ ovs_be16 ofputil_dl_type_from_openflow(ovs_be16 ofp_dl_type);
 bool ofputil_flow_format_is_valid(enum nx_flow_format);
 const char *ofputil_flow_format_to_string(enum nx_flow_format);
 int ofputil_flow_format_from_string(const char *);
-enum nx_flow_format ofputil_min_flow_format(const struct cls_rule *,
-                                            bool cookie_support,
-                                            ovs_be64 cookie);
+enum nx_flow_format ofputil_min_flow_format(const struct cls_rule *);
 
 struct ofpbuf *ofputil_make_set_flow_format(enum nx_flow_format);
 
@@ -136,8 +130,7 @@ struct flow_mod {
     size_t n_actions;
 };
 
-int ofputil_decode_flow_mod(struct flow_mod *, const struct ofp_header *,
-                            enum nx_flow_format);
+int ofputil_decode_flow_mod(struct flow_mod *, const struct ofp_header *);
 struct ofpbuf *ofputil_encode_flow_mod(const struct flow_mod *,
                                        enum nx_flow_format);
 
@@ -150,8 +143,7 @@ struct flow_stats_request {
 };
 
 int ofputil_decode_flow_stats_request(struct flow_stats_request *,
-                                      const struct ofp_header *,
-                                      enum nx_flow_format);
+                                      const struct ofp_header *);
 struct ofpbuf *ofputil_encode_flow_stats_request(
     const struct flow_stats_request *, enum nx_flow_format);
 
@@ -171,8 +163,7 @@ struct ofputil_flow_stats {
 };
 
 int ofputil_decode_flow_stats_reply(struct ofputil_flow_stats *,
-                                    struct ofpbuf *msg,
-                                    enum nx_flow_format);
+                                    struct ofpbuf *msg);
 
 /* Flow removed message, independent of flow format. */
 struct ofputil_flow_removed {
@@ -187,8 +178,7 @@ struct ofputil_flow_removed {
 };
 
 int ofputil_decode_flow_removed(struct ofputil_flow_removed *,
-                                const struct ofp_header *,
-                                enum nx_flow_format);
+                                const struct ofp_header *);
 struct ofpbuf *ofputil_encode_flow_removed(const struct ofputil_flow_removed *,
                                            enum nx_flow_format);
 
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index fb63606ad..7eef93a17 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -2746,8 +2746,7 @@ handle_table_stats_request(struct ofconn *ofconn,
     ots = append_ofp_stats_reply(sizeof *ots, ofconn, &msg);
     memset(ots, 0, sizeof *ots);
     strcpy(ots->name, "classifier");
-    ots->wildcards = (ofconn_get_flow_format(ofconn) == NXFF_OPENFLOW10
-                      ? htonl(OFPFW_ALL) : htonl(OVSFW_ALL));
+    ots->wildcards = htonl(OFPFW_ALL);
     ots->max_entries = htonl(1024 * 1024); /* An arbitrary big number. */
     ots->active_count = htonl(classifier_count(&p->cls));
     put_32aligned_be64(&ots->lookup_count, htonll(0));  /* XXX */
@@ -2835,7 +2834,6 @@ put_ofp_flow_stats(struct ofconn *ofconn, struct rule *rule,
 {
     struct ofp_flow_stats *ofs;
     uint64_t packet_count, byte_count;
-    ovs_be64 cookie;
     size_t act_len, len;
 
     if (rule_is_hidden(rule) || !rule_has_out_port(rule, out_port)) {
@@ -2851,9 +2849,8 @@ put_ofp_flow_stats(struct ofconn *ofconn, struct rule *rule,
     ofs->length = htons(len);
     ofs->table_id = 0;
     ofs->pad = 0;
-    ofputil_cls_rule_to_match(&rule->cr, ofconn_get_flow_format(ofconn),
-                              &ofs->match, rule->flow_cookie, &cookie);
-    put_32aligned_be64(&ofs->cookie, cookie);
+    ofputil_cls_rule_to_match(&rule->cr, &ofs->match);
+    put_32aligned_be64(&ofs->cookie, rule->flow_cookie);
     calc_flow_duration(rule->created, &ofs->duration_sec, &ofs->duration_nsec);
     ofs->priority = htons(rule->cr.priority);
     ofs->idle_timeout = htons(rule->idle_timeout);
@@ -2895,8 +2892,7 @@ handle_flow_stats_request(struct ofconn *ofconn, const struct ofp_header *oh)
         struct cls_rule target;
         struct rule *rule;
 
-        ofputil_cls_rule_from_match(&fsr->match, 0, NXFF_OPENFLOW10, 0,
-                                    &target);
+        ofputil_cls_rule_from_match(&fsr->match, 0, &target);
         cls_cursor_init(&cursor, &ofproto->cls, &target);
         CLS_CURSOR_FOR_EACH (rule, cr, &cursor) {
             put_ofp_flow_stats(ofconn, rule, fsr->out_port, &reply);
@@ -3068,8 +3064,7 @@ handle_aggregate_stats_request(struct ofconn *ofconn,
     struct cls_rule target;
     struct ofpbuf *msg;
 
-    ofputil_cls_rule_from_match(&request->match, 0, NXFF_OPENFLOW10, 0,
-                                &target);
+    ofputil_cls_rule_from_match(&request->match, 0, &target);
 
     msg = start_ofp_stats_reply(oh, sizeof *reply);
     reply = append_ofp_stats_reply(sizeof *reply, ofconn, &msg);
@@ -3516,7 +3511,7 @@ handle_flow_mod(struct ofconn *ofconn, const struct ofp_header *oh)
         return error;
     }
 
-    error = ofputil_decode_flow_mod(&fm, oh, ofconn_get_flow_format(ofconn));
+    error = ofputil_decode_flow_mod(&fm, oh);
     if (error) {
         return error;
     }
@@ -3558,19 +3553,6 @@ handle_flow_mod(struct ofconn *ofconn, const struct ofp_header *oh)
     }
 }
 
-static int
-handle_tun_id_from_cookie(struct ofconn *ofconn, const struct ofp_header *oh)
-{
-    const struct nxt_tun_id_cookie *msg
-        = (const struct nxt_tun_id_cookie *) oh;
-    enum nx_flow_format flow_format;
-
-    flow_format = msg->set ? NXFF_TUN_ID_FROM_COOKIE : NXFF_OPENFLOW10;
-    ofconn_set_flow_format(ofconn, flow_format);
-
-    return 0;
-}
-
 static int
 handle_role_request(struct ofconn *ofconn, const struct ofp_header *oh)
 {
@@ -3611,7 +3593,6 @@ handle_nxt_set_flow_format(struct ofconn *ofconn, const struct ofp_header *oh)
 
     format = ntohl(msg->format);
     if (format == NXFF_OPENFLOW10
-        || format == NXFF_TUN_ID_FROM_COOKIE
         || format == NXFF_NXM) {
         ofconn_set_flow_format(ofconn, format);
         return 0;
@@ -3676,9 +3657,6 @@ handle_openflow__(struct ofconn *ofconn, const struct ofpbuf *msg)
         return 0;
 
         /* Nicira extension requests. */
-    case OFPUTIL_NXT_TUN_ID_FROM_COOKIE:
-        return handle_tun_id_from_cookie(ofconn, oh);
-
     case OFPUTIL_NXT_ROLE_REQUEST:
         return handle_role_request(ofconn, oh);
 
diff --git a/tests/ofp-print.at b/tests/ofp-print.at
index 6e1e3bd4e..923e00e9c 100644
--- a/tests/ofp-print.at
+++ b/tests/ofp-print.at
@@ -601,16 +601,6 @@ OFPT_BARRIER_REPLY (xid=0x1):
 ])
 AT_CLEANUP
 
-AT_SETUP([NXT_TUN_ID_FROM_COOKIE])
-AT_KEYWORDS([ofp-print])
-AT_CHECK([ovs-ofctl ofp-print "\
-01 04 00 18 00 00 00 02 00 00 23 20 00 00 00 09 \
-01 00 00 00 00 00 00 00 \
-"], [0], [dnl
-NXT_TUN_ID_FROM_COOKIE (xid=0x2): set=1
-])
-AT_CLEANUP
-
 AT_SETUP([NXT_ROLE_REQUEST])
 AT_KEYWORDS([ofp-print])
 AT_CHECK([ovs-ofctl ofp-print "\
diff --git a/tests/ofproto.at b/tests/ofproto.at
index fc7ff57ef..10080e671 100644
--- a/tests/ofproto.at
+++ b/tests/ofproto.at
@@ -48,13 +48,9 @@ AT_CHECK([ovs-ofctl dump-flows br0 | STRIP_XIDS], [0], [NXST_FLOW reply:
 ])
 AT_CHECK([echo 'in_port=1,actions=0' | ovs-ofctl add-flows br0 -])
 AT_CHECK([ovs-ofctl add-flow br0 in_port=0,actions=1])
-dnl Tests for a bug in which ofproto ignored tun_id in tun_id_from_cookie
-dnl flow_mod commands.
-AT_CHECK([ovs-ofctl add-flow -F tun_id_from_cookie br0 tun_id=1,actions=mod_vlan_vid:4])
 AT_CHECK([ovs-ofctl dump-flows br0 | STRIP_XIDS | STRIP_DURATION | sort], [0], [dnl
  cookie=0x0, duration=?s, table_id=0, n_packets=0, n_bytes=0, in_port=1 actions=output:0
  cookie=0x0, duration=?s, table_id=0, n_packets=0, n_bytes=0, in_port=65534 actions=output:1
- cookie=0x100000000, duration=?s, table_id=0, n_packets=0, n_bytes=0, tun_id=0x1 actions=mod_vlan_vid:4
 NXST_FLOW reply:
 ])
 AT_CHECK([ovs-ofctl del-flows br0])
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index 466ade6b5..101471082 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -28,12 +28,11 @@ OFPT_FLOW_MOD: ADD tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:
 OFPT_FLOW_MOD: ADD udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1
 OFPT_FLOW_MOD: ADD priority=60000 cookie:0x123456789abcdef hard:10 actions=CONTROLLER:65535
 OFPT_FLOW_MOD: ADD actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.00.00.00.00.00.00,note:00.00.00.00.00.00
-NXT_TUN_ID_FROM_COOKIE: set=1
-OFPT_FLOW_MOD: ADD cookie:0x123400005678 actions=FLOOD
-OFPT_FLOW_MOD: ADD actions=set_tunnel:0x1234,set_tunnel64:0x9876,set_tunnel64:0x123456789
-OFPT_FLOW_MOD: ADD actions=multipath(eth_src,50,hrw,12,0,NXM_NX_REG0[0..3]),multipath(symmetric_l4,1024,iter_hash,5000,5050,NXM_NX_REG0[0..12])
-OFPT_FLOW_MOD: ADD actions=drop
 NXT_SET_FLOW_FORMAT: format=nxm
+NXT_FLOW_MOD: ADD tun_id=0x1234 cookie:0x5678 actions=FLOOD
+NXT_FLOW_MOD: ADD actions=set_tunnel:0x1234,set_tunnel64:0x9876,set_tunnel64:0x123456789
+NXT_FLOW_MOD: ADD actions=multipath(eth_src,50,hrw,12,0,NXM_NX_REG0[0..3]),multipath(symmetric_l4,1024,iter_hash,5000,5050,NXM_NX_REG0[0..12])
+NXT_FLOW_MOD: ADD actions=drop
 NXT_FLOW_MOD: ADD tun_id=0x1234000056780000/0xffff0000ffff0000 actions=drop
 ]])
 AT_CHECK([sed 's/.*|//' stderr], [0], [dnl
@@ -46,12 +45,6 @@ post: wildcards=  0x3fffff  in_port=65534  dl_src=00:00:00:00:00:00  dl_dst=00:0
 normalization changed ofp_match, details:
  pre: wildcards=  0x3820ff  in_port=65534  dl_src=00:00:00:00:00:00  dl_dst=00:00:00:00:00:00  dl_vlan=    0  dl_vlan_pcp=  0  dl_type=     0  nw_tos=   0  nw_proto=   0  nw_src=         0  nw_dst=         0  tp_src=    0  tp_dst=    0
 post: wildcards=  0x3fffff  in_port=65534  dl_src=00:00:00:00:00:00  dl_dst=00:00:00:00:00:00  dl_vlan=    0  dl_vlan_pcp=  0  dl_type=     0  nw_tos=   0  nw_proto=   0  nw_src=         0  nw_dst=         0  tp_src=    0  tp_dst=    0
-normalization changed ofp_match, details:
- pre: wildcards=  0x3820ff  in_port=65534  dl_src=00:00:00:00:00:00  dl_dst=00:00:00:00:00:00  dl_vlan=    0  dl_vlan_pcp=  0  dl_type=     0  nw_tos=   0  nw_proto=   0  nw_src=         0  nw_dst=         0  tp_src=    0  tp_dst=    0
-post: wildcards=  0x3fffff  in_port=65534  dl_src=00:00:00:00:00:00  dl_dst=00:00:00:00:00:00  dl_vlan=    0  dl_vlan_pcp=  0  dl_type=     0  nw_tos=   0  nw_proto=   0  nw_src=         0  nw_dst=         0  tp_src=    0  tp_dst=    0
-normalization changed ofp_match, details:
- pre: wildcards= 0x23820ff  in_port=65534  dl_src=00:00:00:00:00:00  dl_dst=00:00:00:00:00:00  dl_vlan=    0  dl_vlan_pcp=  0  dl_type=     0  nw_tos=   0  nw_proto=   0  nw_src=         0  nw_dst=         0  tp_src=    0  tp_dst=    0
-post: wildcards= 0x23fffff  in_port=65534  dl_src=00:00:00:00:00:00  dl_dst=00:00:00:00:00:00  dl_vlan=    0  dl_vlan_pcp=  0  dl_type=     0  nw_tos=   0  nw_proto=   0  nw_src=         0  nw_dst=         0  tp_src=    0  tp_dst=    0
 ])
 AT_CLEANUP
 
@@ -469,7 +462,7 @@ dnl Check that "-F openflow10" rejects a flow_mod with a tun_id, since
 dnl OpenFlow 1.0 doesn't support tunnels.
 AT_SETUP([ovs-ofctl -F option and tun_id])
 AT_CHECK([ovs-ofctl -F openflow10 add-flow dummy tun_id=123,actions=drop],
-  [1], [], [ovs-ofctl: flow cannot be expressed in flow format openflow10 (flow format tun_id_from_cookie or better is required)
+  [1], [], [ovs-ofctl: flow cannot be expressed in flow format openflow10 (flow format nxm or better is required)
 ])
 AT_CLEANUP
 
diff --git a/tests/test-flows.c b/tests/test-flows.c
index 2edfa35fe..559af3a31 100644
--- a/tests/test-flows.c
+++ b/tests/test-flows.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -70,8 +70,7 @@ main(int argc OVS_UNUSED, char *argv[])
 
         flow_extract(packet, 0, 1, &flow);
         cls_rule_init_exact(&flow, 0, &rule);
-        ofputil_cls_rule_to_match(&rule, NXFF_OPENFLOW10, &extracted_match,
-                                  0, NULL);
+        ofputil_cls_rule_to_match(&rule, &extracted_match);
 
         if (memcmp(&expected_match, &extracted_match, sizeof expected_match)) {
             char *exp_s = ofp_match_to_string(&expected_match, 2);
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index d9742c8ec..c44de463b 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -764,11 +764,6 @@ increasing capability:
 This is the standard OpenFlow 1.0 flow format.  It should be supported
 by all OpenFlow switches.
 .
-.IP "\fBtun_id_from_cookie\fR"
-This Nicira extension to OpenFlow adds minimal and limited support for
-\fBtun_id\fR, but it does not support any other Nicira flow
-extensions.  (This flow format is deprecated.)
-.
 .IP "\fBnxm\fR (Nicira Extended Match)"
 This Nicira extension to OpenFlow is flexible and extensible.  It
 supports all of the Nicira flow extensions, such as \fBtun_id\fR and
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index bc8cc6bd8..c0ff4dc12 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -515,8 +515,6 @@ negotiate_highest_flow_format(struct vconn *vconn,
 
         if (try_set_flow_format(vconn, NXFF_NXM)) {
             flow_format = NXFF_NXM;
-        } else if (try_set_flow_format(vconn, NXFF_TUN_ID_FROM_COOKIE)) {
-            flow_format = NXFF_TUN_ID_FROM_COOKIE;
         } else {
             flow_format = NXFF_OPENFLOW10;
         }
@@ -544,7 +542,7 @@ do_dump_flows__(int argc, char *argv[], bool aggregate)
     parse_ofp_flow_stats_request_str(&fsr, aggregate, argc > 2 ? argv[2] : "");
 
     open_vconn(argv[1], &vconn);
-    min_flow_format = ofputil_min_flow_format(&fsr.match, false, 0);
+    min_flow_format = ofputil_min_flow_format(&fsr.match);
     flow_format = negotiate_highest_flow_format(vconn, min_flow_format);
     request = ofputil_encode_flow_stats_request(&fsr, flow_format);
     dump_stats_transaction(argv[1], request);
@@ -1056,7 +1054,7 @@ read_flows_from_file(const char *filename, struct classifier *cls, int index)
         version->n_actions = actions.size / sizeof *version->actions;
         version->actions = ofpbuf_steal_data(&actions);
 
-        min_ff = ofputil_min_flow_format(&fm.cr, true, fm.cookie);
+        min_ff = ofputil_min_flow_format(&fm.cr);
         min_flow_format = MAX(min_flow_format, min_ff);
         check_final_format_for_flow_mod(min_flow_format);
 
@@ -1122,8 +1120,7 @@ read_flows_from_switch(struct vconn *vconn, enum nx_flow_format flow_format,
                 struct ofputil_flow_stats fs;
                 int retval;
 
-                retval = ofputil_decode_flow_stats_reply(&fs, reply,
-                                                         flow_format);
+                retval = ofputil_decode_flow_stats_reply(&fs, reply);
                 if (retval) {
                     if (retval != EOF) {
                         ovs_fatal(0, "parse error in reply");

From 84b32864207813c5f4b9989f66303a577cb01ba0 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Mon, 25 Apr 2011 16:06:54 -0700
Subject: [PATCH 02/40] bridge: Create new "null" interface type.

Null interfaces are completely ignored by Open vSwitch.
---
 vswitchd/bridge.c    | 21 +++++++++++++--------
 vswitchd/vswitch.xml |  2 ++
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 20ecca356..84ed03e80 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -2951,12 +2951,12 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg)
 
     port->cfg = cfg;
 
-
     /* Add new interfaces and update 'cfg' member of existing ones. */
     sset_init(&new_ifaces);
     for (i = 0; i < cfg->n_interfaces; i++) {
         const struct ovsrec_interface *if_cfg = cfg->interfaces[i];
         struct iface *iface;
+        const char *type;
 
         if (!sset_add(&new_ifaces, if_cfg->name)) {
             VLOG_WARN("port %s: %s specified twice as port interface",
@@ -2965,8 +2965,18 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg)
             continue;
         }
 
+        /* Determine interface type.  The local port always has type
+         * "internal".  Other ports take their type from the database and
+         * default to "system" if none is specified. */
+        type = (!strcmp(if_cfg->name, port->bridge->name) ? "internal"
+                : if_cfg->type[0] ? if_cfg->type
+                : "system");
+
         iface = iface_lookup(port->bridge, if_cfg->name);
-        if (iface) {
+        if (!strcmp(type, "null")) {
+            iface_destroy(iface);
+            continue;
+        } else if (iface) {
             if (iface->port != port) {
                 VLOG_ERR("bridge %s: %s interface is on multiple ports, "
                          "removing from %s",
@@ -2978,12 +2988,7 @@ port_reconfigure(struct port *port, const struct ovsrec_port *cfg)
             iface = iface_create(port, if_cfg);
         }
 
-        /* Determine interface type.  The local port always has type
-         * "internal".  Other ports take their type from the database and
-         * default to "system" if none is specified. */
-        iface->type = (!strcmp(if_cfg->name, port->bridge->name) ? "internal"
-                       : if_cfg->type[0] ? if_cfg->type
-                       : "system");
+        iface->type = type;
     }
     sset_destroy(&new_ifaces);
 
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 4cdc1b1ca..ce00cbd70 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1047,6 +1047,8 @@
               </dd>
             </dl>
           </dd>
+          <dt><code>null</code></dt>
+          <dd>An ignored interface.</dd>
         </dl>
       </column>
 

From 672d18b29644302efa9fbc42a88cde80237d6dd8 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Thu, 21 Apr 2011 13:55:45 -0700
Subject: [PATCH 03/40] bond: New bond-hash-basis setting.

---
 lib/bond.c                 | 40 ++++++++++++++++++++++++++++----------
 lib/bond.h                 |  1 +
 vswitchd/bridge.c          |  2 ++
 vswitchd/ovs-vswitchd.8.in |  4 ++--
 vswitchd/vswitch.xml       |  4 ++++
 5 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/lib/bond.c b/lib/bond.c
index ed6ed89b8..d7242fae2 100644
--- a/lib/bond.c
+++ b/lib/bond.c
@@ -93,6 +93,7 @@ struct bond {
     int updelay, downdelay;     /* Delay before slave goes up/down, in ms. */
     bool lacp_negotiated;       /* LACP negotiations were successful. */
     bool bond_revalidate;       /* True if flows need revalidation. */
+    uint32_t basis;             /* Basis for flow hash function. */
 
     /* SLB specific bonding info. */
     struct bond_entry *hash;     /* An array of (BOND_MASK + 1) elements. */
@@ -129,8 +130,9 @@ static void bond_link_status_update(struct bond_slave *, struct tag_set *);
 static void bond_choose_active_slave(struct bond *, struct tag_set *);
 static bool bond_is_tcp_hash(const struct bond *);
 static unsigned int bond_hash_src(const uint8_t mac[ETH_ADDR_LEN],
-                                  uint16_t vlan);
-static unsigned int bond_hash_tcp(const struct flow *, uint16_t vlan);
+                                  uint16_t vlan, uint32_t basis);
+static unsigned int bond_hash_tcp(const struct flow *, uint16_t vlan,
+                                  uint32_t basis);
 static struct bond_entry *lookup_bond_entry(const struct bond *,
                                             const struct flow *,
                                             uint16_t vlan);
@@ -291,6 +293,11 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
         revalidate = true;
     }
 
+    if (bond->basis != s->basis) {
+        bond->basis = s->basis;
+        revalidate = true;
+    }
+
     if (bond->detect == BLSM_CARRIER) {
         struct bond_slave *slave;
 
@@ -978,6 +985,8 @@ bond_unixctl_show(struct unixctl_conn *conn,
                       bond_is_tcp_hash(bond) ? "balance-tcp" : "balance-slb");
     }
 
+    ds_put_format(&ds, "bond-hash-basis: %"PRIu32"\n", bond->basis);
+
     ds_put_format(&ds, "bond-detect-mode: %s\n",
                   bond->monitor ? "carrier" : "miimon");
 
@@ -1206,11 +1215,13 @@ bond_unixctl_hash(struct unixctl_conn *conn, const char *args_,
     uint8_t hash;
     char *hash_cstr;
     unsigned int vlan;
-    char *mac_s, *vlan_s;
+    uint32_t basis;
+    char *mac_s, *vlan_s, *basis_s;
     char *save_ptr = NULL;
 
     mac_s  = strtok_r(args, " ", &save_ptr);
     vlan_s = strtok_r(NULL, " ", &save_ptr);
+    basis_s = strtok_r(NULL, " ", &save_ptr);
 
     if (vlan_s) {
         if (sscanf(vlan_s, "%u", &vlan) != 1) {
@@ -1221,9 +1232,18 @@ bond_unixctl_hash(struct unixctl_conn *conn, const char *args_,
         vlan = OFP_VLAN_NONE;
     }
 
+    if (basis_s) {
+        if (sscanf(basis_s, "%"PRIu32, &basis) != 1) {
+            unixctl_command_reply(conn, 501, "invalid basis");
+            return;
+        }
+    } else {
+        basis = 0;
+    }
+
     if (sscanf(mac_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))
         == ETH_ADDR_SCAN_COUNT) {
-        hash = bond_hash_src(mac, vlan) & BOND_MASK;
+        hash = bond_hash_src(mac, vlan, basis) & BOND_MASK;
 
         hash_cstr = xasprintf("%u", hash);
         unixctl_command_reply(conn, 200, hash_cstr);
@@ -1355,13 +1375,13 @@ bond_is_tcp_hash(const struct bond *bond)
 }
 
 static unsigned int
-bond_hash_src(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan)
+bond_hash_src(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan, uint32_t basis)
 {
-    return hash_bytes(mac, ETH_ADDR_LEN, vlan);
+    return hash_3words(hash_bytes(mac, ETH_ADDR_LEN, 0), vlan, basis);
 }
 
 static unsigned int
-bond_hash_tcp(const struct flow *flow, uint16_t vlan)
+bond_hash_tcp(const struct flow *flow, uint16_t vlan, uint32_t basis)
 {
     struct flow hash_flow = *flow;
     hash_flow.vlan_tci = vlan;
@@ -1369,7 +1389,7 @@ bond_hash_tcp(const struct flow *flow, uint16_t vlan)
     /* The symmetric quality of this hash function is not required, but
      * flow_hash_symmetric_l4 already exists, and is sufficient for our
      * purposes, so we use it out of convenience. */
-    return flow_hash_symmetric_l4(&hash_flow, 0);
+    return flow_hash_symmetric_l4(&hash_flow, basis);
 }
 
 static unsigned int
@@ -1378,8 +1398,8 @@ bond_hash(const struct bond *bond, const struct flow *flow, uint16_t vlan)
     assert(bond->balance != BM_AB);
 
     return (bond_is_tcp_hash(bond)
-            ? bond_hash_tcp(flow, vlan)
-            : bond_hash_src(flow->dl_src, vlan));
+            ? bond_hash_tcp(flow, vlan, bond->basis)
+            : bond_hash_src(flow->dl_src, vlan, bond->basis));
 }
 
 static struct bond_entry *
diff --git a/lib/bond.h b/lib/bond.h
index b2ab89cd8..5847abc10 100644
--- a/lib/bond.h
+++ b/lib/bond.h
@@ -50,6 +50,7 @@ const char *bond_detect_mode_to_string(enum bond_detect_mode);
 /* Configuration for a bond as a whole. */
 struct bond_settings {
     char *name;                 /* Bond's name, for log messages. */
+    uint32_t basis;             /* Flow hashing basis. */
 
     /* Balancing configuration. */
     enum bond_mode balance;
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 84ed03e80..dee19a9bb 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -3258,6 +3258,8 @@ port_reconfigure_bond(struct port *port)
 
     s.up_delay = MAX(0, port->cfg->bond_updelay);
     s.down_delay = MAX(0, port->cfg->bond_downdelay);
+    s.basis = atoi(get_port_other_config(port->cfg, "bond-hash-basis", "0"));
+
     s.rebalance_interval = atoi(
         get_port_other_config(port->cfg, "bond-rebalance-interval", "10000"));
     if (s.rebalance_interval < 1000) {
diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in
index 4c02f8a4b..488257403 100644
--- a/vswitchd/ovs-vswitchd.8.in
+++ b/vswitchd/ovs-vswitchd.8.in
@@ -182,9 +182,9 @@ updelay (or downdelay).
 .IP
 This setting is not permanent: it persists only until the carrier
 status of \fIslave\fR changes.
-.IP "\fBbond/hash\fR \fImac\fR [\fIvlan\fR]"
+.IP "\fBbond/hash\fR \fImac\fR [\fIvlan\fR] [\fIbasis\fR]"
 Returns the hash value which would be used for \fImac\fR with \fIvlan\fR
-if specified.
+and \fIbasis\fR if specified.
 .
 .IP "\fBlacp/show\fR \fIport\fR"
 Lists all of the LACP related information about the given \fIport\fR:
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index ce00cbd70..99f4fc40f 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -647,6 +647,10 @@
           <dd> The number of milliseconds between successive attempts to
             poll each interface's MII.  Only relevant on ports which use
             <code>miimon</code> to detect failures. </dd>
+          <dt><code>bond-hash-basis</code></dt>
+          <dd> An integer hashed along with flows when choosing output slaves.
+            When changed, all flows will be assigned different hash values
+            possibly causing slave selection decisions to change.</dd>
           <dt><code>lacp-system-id</code></dt>
           <dd> The LACP system ID of this <ref table="Port"/>.  The system ID
             of a LACP bond is used to identify itself to its partners.  Must

From 76ea8efd442a403f8e874cd4103dafe2479b6a57 Mon Sep 17 00:00:00 2001
From: Andrew Evans <aevans@nicira.com>
Date: Tue, 26 Apr 2011 13:58:35 -0700
Subject: [PATCH 04/40] vswitch.xml: s/switchs/switches/g

---
 vswitchd/vswitch.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 99f4fc40f..0ec1fd3c9 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -577,8 +577,8 @@
 
       <column name="lacp">
         <p>Configures LACP on this port.  LACP allows directly connected
-          switchs to negotiate which links may be bonded.  LACP may be enabled
-          on non-bonded ports for the benefit of any switchs they may be
+          switches to negotiate which links may be bonded.  LACP may be enabled
+          on non-bonded ports for the benefit of any switches they may be
           connected to.  <code>active</code> ports are allowed to initiate LACP
           negotiations.  <code>passive</code> ports are allowed to participate
           in LACP negotiations initiated by a remote switch, but not allowed to
@@ -682,7 +682,7 @@
           <dd> When <code>true</code>, configures this <ref table="Port"/> to
             require successful LACP negotiations to enable any slaves.
             Defaults to <code>false</code> which safely allows LACP to be used
-            with switchs that do not support the protocol.</dd>
+            with switches that do not support the protocol.</dd>
         </dl>
       </column>
     </group>

From 3112cfa4084eb5d4bacbced1295c0e14c5f27743 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Tue, 26 Apr 2011 17:28:44 -0700
Subject: [PATCH 05/40] connmgr: Remove unused variable.

Fixes the following warning:

ofproto/connmgr.c:396:11: error: variable 'ss_exists' set but not
used [-Werror=unused-but-set-variable]
---
 ofproto/connmgr.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c
index 83b3159e8..a76dc8e80 100644
--- a/ofproto/connmgr.c
+++ b/ofproto/connmgr.c
@@ -393,7 +393,6 @@ connmgr_set_controllers(struct connmgr *mgr,
     struct shash new_controllers;
     struct ofconn *ofconn, *next_ofconn;
     struct ofservice *ofservice, *next_ofservice;
-    bool ss_exists;
     size_t i;
 
     /* Create newly configured controllers and services.
@@ -421,7 +420,6 @@ connmgr_set_controllers(struct connmgr *mgr,
 
     /* Delete controllers that are no longer configured.
      * Update configuration of all now-existing controllers. */
-    ss_exists = false;
     HMAP_FOR_EACH_SAFE (ofconn, next_ofconn, hmap_node, &mgr->controllers) {
         struct ofproto_controller *c;
 

From d4f15cb982ff772110509c4292697bbe8a68d0f4 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Tue, 26 Apr 2011 17:32:41 -0700
Subject: [PATCH 06/40] ofproto: Remove unused variable.

Fixes the following warning:

ofproto/ofproto.c:4176:30: error: variable 'pin' set but not used
[-Werror=unused-but-set-variable]
---
 ofproto/ofproto.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 7eef93a17..445f0d2bd 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -4173,13 +4173,6 @@ static void
 send_packet_in(struct ofproto *ofproto, struct dpif_upcall *upcall,
                const struct flow *flow, bool clone)
 {
-    struct ofputil_packet_in pin;
-
-    pin.packet = upcall->packet;
-    pin.in_port = odp_port_to_ofp_port(flow->in_port);
-    pin.reason = upcall->type == DPIF_UC_MISS ? OFPR_NO_MATCH : OFPR_ACTION;
-    pin.buffer_id = 0;          /* not yet known */
-    pin.send_len = upcall->userdata;
     connmgr_send_packet_in(ofproto->connmgr, upcall, flow,
                            clone ? NULL : upcall->packet);
 }

From da2f7b8ff5f41470cd1238376fbad5a4b75ad1a0 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Tue, 26 Apr 2011 15:05:19 -0700
Subject: [PATCH 07/40] lacp: New other_config setting
 "lacp-force-aggregatable".

In some extremely advanced situations, one may want to force
non-bondable slaves to advertise themselves as bondable.  This
patch adds that capability.

Also includes some minor code cleanup.
---
 lib/lacp.c           | 17 ++++++++++-------
 lib/lacp.h           |  1 +
 vswitchd/bridge.c    |  4 ++++
 vswitchd/vswitch.xml |  4 ++++
 4 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/lib/lacp.c b/lib/lacp.c
index a7f66a24a..21d329eb0 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -53,6 +53,7 @@ struct lacp {
     bool strict;             /* True if in strict mode. */
     bool negotiated;         /* True if LACP negotiations were successful. */
     bool update;             /* True if lacp_update() needs to be called. */
+    bool force_agg;          /* Forces LACP_STATE_AGG bit on all slaves. */
 };
 
 struct slave {
@@ -190,6 +191,7 @@ lacp_configure(struct lacp *lacp, const struct lacp_settings *s)
 
     lacp->active = s->active;
     lacp->lacp_time = s->lacp_time;
+    lacp->force_agg = s->force_agg;
     lacp->custom_time = MAX(TIME_UPDATE_INTERVAL, s->custom_time);
 }
 
@@ -525,13 +527,14 @@ slave_set_expired(struct slave *slave)
 static void
 slave_get_actor(struct slave *slave, struct lacp_info *actor)
 {
+    struct lacp *lacp = slave->lacp;
     uint8_t state = 0;
 
-    if (slave->lacp->active) {
+    if (lacp->active) {
         state |= LACP_STATE_ACT;
     }
 
-    if (slave->lacp->lacp_time != LACP_TIME_SLOW) {
+    if (lacp->lacp_time != LACP_TIME_SLOW) {
         state |= LACP_STATE_TIME;
     }
 
@@ -547,20 +550,20 @@ slave_get_actor(struct slave *slave, struct lacp_info *actor)
         state |= LACP_STATE_EXP;
     }
 
-    if (hmap_count(&slave->lacp->slaves) > 1) {
+    if (lacp->force_agg || hmap_count(&lacp->slaves) > 1) {
         state |= LACP_STATE_AGG;
     }
 
-    if (slave->attached || !slave->lacp->negotiated) {
+    if (slave->attached || !lacp->negotiated) {
         state |= LACP_STATE_COL | LACP_STATE_DIST;
     }
 
     actor->state = state;
-    actor->key = htons(slave->lacp->key_slave->port_id);
+    actor->key = htons(lacp->key_slave->port_id);
     actor->port_priority = htons(slave->port_priority);
     actor->port_id = htons(slave->port_id);
-    actor->sys_priority = htons(slave->lacp->sys_priority);
-    memcpy(&actor->sys_id, slave->lacp->sys_id, ETH_ADDR_LEN);
+    actor->sys_priority = htons(lacp->sys_priority);
+    memcpy(&actor->sys_id, lacp->sys_id, ETH_ADDR_LEN);
 }
 
 /* Given 'slave', populates 'priority' with data representing its LACP link
diff --git a/lib/lacp.h b/lib/lacp.h
index 396569205..871056c4d 100644
--- a/lib/lacp.h
+++ b/lib/lacp.h
@@ -89,6 +89,7 @@ struct lacp_settings {
     enum lacp_time lacp_time;
     long long int custom_time;
     bool strict;
+    bool force_agg;
 };
 
 void lacp_init(void);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index dee19a9bb..270ab800d 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -3193,6 +3193,10 @@ port_reconfigure_lacp(struct port *port)
                                              "false"),
                        "true");
 
+    s.force_agg = !strcmp(get_port_other_config(port->cfg,
+                                                "lacp-force-aggregatable",
+                                                "false"), "true");
+
     lacp_time = get_port_other_config(port->cfg, "lacp-time", "slow");
     custom_time = atoi(lacp_time);
     if (!strcmp(lacp_time, "fast")) {
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 0ec1fd3c9..e75483559 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -683,6 +683,10 @@
             require successful LACP negotiations to enable any slaves.
             Defaults to <code>false</code> which safely allows LACP to be used
             with switches that do not support the protocol.</dd>
+          <dt><code>lacp-force-aggregatable</code></dt>
+          <dd> When <code>true</code>, forces all slaves managed by this
+            <ref table="Port"/> to advertise themselves as aggregatable even if
+            they normally wouldn't.  Defaults to <code>false</code>.</dd>
         </dl>
       </column>
     </group>

From e1ce3f2dccb027ef5ebe6035ab4f6a71de4ccf1c Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Tue, 26 Apr 2011 15:39:58 -0700
Subject: [PATCH 08/40] lacp: Allow configurable aggregation keys.

Users will the ability to manually set aggregation keys on a
per-slave basis in order to use some of the more advanced LACP
features.  Most notably, LACP controlled active-backup bonding
requires fine grained aggregation key configuration.
---
 lib/lacp.c           | 14 ++++++++++++--
 lib/lacp.h           |  1 +
 vswitchd/bridge.c    |  9 ++++++++-
 vswitchd/vswitch.xml |  5 +++++
 4 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/lib/lacp.c b/lib/lacp.c
index 21d329eb0..87f70bf77 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -63,6 +63,7 @@ struct slave {
     struct lacp *lacp;            /* LACP object containing this slave. */
     uint16_t port_id;             /* Port ID. */
     uint16_t port_priority;       /* Port Priority. */
+    uint16_t key;                 /* Aggregation Key. 0 if default. */
     char *name;                   /* Name of this slave. */
 
     enum slave_status status;     /* Slave status. */
@@ -274,9 +275,12 @@ lacp_slave_register(struct lacp *lacp, void *slave_,
         slave->name = xstrdup(s->name);
     }
 
-    if (slave->port_id != s->id || slave->port_priority != s->priority) {
+    if (slave->port_id != s->id
+        || slave->port_priority != s->priority
+        || slave->key != s->key) {
         slave->port_id = s->id;
         slave->port_priority = s->priority;
+        slave->key = s->key;
 
         lacp->update = true;
 
@@ -528,6 +532,7 @@ static void
 slave_get_actor(struct slave *slave, struct lacp_info *actor)
 {
     struct lacp *lacp = slave->lacp;
+    uint16_t key;
     uint8_t state = 0;
 
     if (lacp->active) {
@@ -558,8 +563,13 @@ slave_get_actor(struct slave *slave, struct lacp_info *actor)
         state |= LACP_STATE_COL | LACP_STATE_DIST;
     }
 
+    key = lacp->key_slave->key;
+    if (!key) {
+        key = lacp->key_slave->port_id;
+    }
+
     actor->state = state;
-    actor->key = htons(lacp->key_slave->port_id);
+    actor->key = htons(key);
     actor->port_priority = htons(slave->port_priority);
     actor->port_id = htons(slave->port_id);
     actor->sys_priority = htons(lacp->sys_priority);
diff --git a/lib/lacp.h b/lib/lacp.h
index 871056c4d..fb91b4f14 100644
--- a/lib/lacp.h
+++ b/lib/lacp.h
@@ -107,6 +107,7 @@ struct lacp_slave_settings {
     char *name;
     uint16_t id;
     uint16_t priority;
+    uint16_t key;
 };
 
 void lacp_slave_register(struct lacp *, void *slave_,
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 270ab800d..b2dc55fb8 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -3136,11 +3136,13 @@ static void
 iface_reconfigure_lacp(struct iface *iface)
 {
     struct lacp_slave_settings s;
-    int priority, portid;
+    int priority, portid, key;
 
     portid = atoi(get_interface_other_config(iface->cfg, "lacp-port-id", "0"));
     priority = atoi(get_interface_other_config(iface->cfg,
                                                "lacp-port-priority", "0"));
+    key = atoi(get_interface_other_config(iface->cfg, "lacp-aggregation-key",
+                                          "0"));
 
     if (portid <= 0 || portid > UINT16_MAX) {
         portid = iface->dp_ifidx;
@@ -3150,9 +3152,14 @@ iface_reconfigure_lacp(struct iface *iface)
         priority = UINT16_MAX;
     }
 
+    if (key < 0 || key > UINT16_MAX) {
+        key = 0;
+    }
+
     s.name = iface->name;
     s.id = portid;
     s.priority = priority;
+    s.key = key;
     lacp_slave_register(iface->port->lacp, iface, &s);
 }
 
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index e75483559..48315fa9c 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1301,6 +1301,11 @@
             LACP negotiations <ref table="Interface"/>s with numerically lower
             priorities are preferred for aggregation.  Must be a number between
             1 and 65535.</dd>
+          <dt><code>lacp-aggregation-key</code></dt>
+          <dd> The LACP aggregation key of this <ref table="Interface"/>.
+            <ref table="Interface"/>s with different aggregation keys may not
+            be active within a given <ref table="Port"/> at the same time. Must
+            be a number between 1 and 65535.</dd>
         </dl>
       </column>
 

From 530af29c41eb04d355e400871fa58f39002315de Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Thu, 14 Apr 2011 13:10:09 -0700
Subject: [PATCH 09/40] datapath: Backport DIV_ROUND_UP.

Older kernels didn't define DIV_ROUND_UP, so this provides a
backported version.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 datapath/linux-2.6/compat-2.6/include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
index 2c2220b9f..4af88516e 100644
--- a/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
+++ b/datapath/linux-2.6/compat-2.6/include/linux/kernel.h
@@ -54,4 +54,8 @@
 #define SHRT_MIN	((s16)(-SHRT_MAX - 1))
 #endif
 
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+
 #endif /* linux/kernel.h */

From 7adfd7bfd18c84d992fd443499c6df4cabc0749f Mon Sep 17 00:00:00 2001
From: Andrew Evans <aevans@nicira.com>
Date: Wed, 27 Apr 2011 18:58:16 -0700
Subject: [PATCH 10/40] datapath: Make git ignore linux-2.6/vlan.c.

Signed-off-by: Andrew Evans <aevans@nicira.com>
---
 datapath/linux-2.6/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datapath/linux-2.6/.gitignore b/datapath/linux-2.6/.gitignore
index f5edd503f..e2a0cb7a2 100644
--- a/datapath/linux-2.6/.gitignore
+++ b/datapath/linux-2.6/.gitignore
@@ -30,6 +30,7 @@
 /time.c
 /tmp
 /tunnel.c
+/vlan.c
 /vport-capwap.c
 /vport-generic.c
 /vport-gre.c

From 73976ebdb054d1a5a2fedb925304b5e0956c20d1 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Wed, 27 Apr 2011 08:46:38 -0700
Subject: [PATCH 11/40] ovs-monitor-ipsec: Allow IKE fragmentation

Some (broken) firewalls do not properly pass UDP fragments, which will
prevent IKE from completing.  This commit enables the racoon option to
allow application-level fragmenting and allow security associations to
be created.
---
 debian/ovs-monitor-ipsec | 1 +
 1 file changed, 1 insertion(+)

diff --git a/debian/ovs-monitor-ipsec b/debian/ovs-monitor-ipsec
index febd5691d..0a97c88dc 100755
--- a/debian/ovs-monitor-ipsec
+++ b/debian/ovs-monitor-ipsec
@@ -83,6 +83,7 @@ path certificate "%s";
     cert_entry = """remote %s {
         exchange_mode main;
         nat_traversal on;
+        ike_frag on;
         certificate_type x509 "%s" "%s";
         my_identifier asn1dn;
         peers_identifier asn1dn;

From ebca7787e50327716e2c2ea4a7cc2656fd4e9fd5 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 28 Apr 2011 16:13:39 -0700
Subject: [PATCH 12/40] datapath: No need to zero cb anymore in
 odp_packet_cmd_execute().

Before commit 3f19d399f "datapath: Fix mysterious GRE-over-IPSEC problems,"
'packet' in opd_packet_cmd_execute() was an skb cloned from one created by
Netlink, so its cb member wasn't necessarily zeroed.  But that commit
changed 'packet' to be freshly allocated with __dev_alloc_skb(), which
means that cb is zeroed, so we don't have to do it again.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
---
 datapath/datapath.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 5ce77cd34..20ae4d82e 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -714,9 +714,6 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	else
 		packet->protocol = htons(ETH_P_802_2);
 
-	/* Initialize OVS_CB (it came from Netlink so might not be zeroed). */
-	memset(OVS_CB(packet), 0, sizeof(struct ovs_skb_cb));
-
 	err = flow_extract(packet, -1, &key, &is_frag);
 	if (err)
 		goto err_kfree_skb;

From 6f04e94a264edae76aa2bb44a9019e19b29d47a1 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 28 Apr 2011 16:34:56 -0700
Subject: [PATCH 13/40] datapath: Avoid freeing wild pointer in corner case.

In odp_flow_cmd_new_or_set(), if flow_actions_alloc() fails in the "new
flow" case, then flow_put() will kfree() the new flow's 'sf_acts' pointer,
but nothing has initialized that pointer.  Initialize the pointer to NULL
to avoid the problem.

Found by inspection.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
---
 datapath/flow.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datapath/flow.c b/datapath/flow.c
index f264866a7..d670925af 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -196,6 +196,7 @@ struct sw_flow *flow_alloc(void)
 
 	spin_lock_init(&flow->lock);
 	atomic_set(&flow->refcnt, 1);
+	flow->sf_acts = NULL;
 	flow->dead = false;
 
 	return flow;

From e0e57990f6d59db44e47b3991b8bea7392b9f30b Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 28 Apr 2011 16:54:07 -0700
Subject: [PATCH 14/40] datapath: Make every packet passing through the
 datapath have an sw_flow.

This way, it's always possible to get a packet's key or hash simply by
looking at its 'flow', without considering whether the packet came from
userspace or from a vport.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
---
 datapath/datapath.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 20ae4d82e..46dc7379c 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -677,8 +677,9 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 {
 	struct odp_header *odp_header = info->userhdr;
 	struct nlattr **a = info->attrs;
+	struct sw_flow_actions *acts;
 	struct sk_buff *packet;
-	struct sw_flow_key key;
+	struct sw_flow *flow;
 	struct datapath *dp;
 	struct ethhdr *eth;
 	bool is_frag;
@@ -714,23 +715,42 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	else
 		packet->protocol = htons(ETH_P_802_2);
 
-	err = flow_extract(packet, -1, &key, &is_frag);
-	if (err)
+	/* Build an sw_flow for sending this packet. */
+	flow = flow_alloc();
+	err = PTR_ERR(flow);
+	if (IS_ERR(flow))
 		goto err_kfree_skb;
 
+	err = flow_extract(packet, -1, &flow->key, &is_frag);
+	if (err)
+		goto err_flow_put;
+	flow->tbl_node.hash = flow_hash(&flow->key);
+
+	acts = flow_actions_alloc(a[ODP_PACKET_ATTR_ACTIONS]);
+	err = PTR_ERR(acts);
+	if (IS_ERR(acts))
+		goto err_flow_put;
+	rcu_assign_pointer(flow->sf_acts, acts);
+
+	OVS_CB(packet)->flow = flow;
+
 	rcu_read_lock();
 	dp = get_dp(odp_header->dp_ifindex);
 	err = -ENODEV;
 	if (!dp)
 		goto err_unlock;
-	err = execute_actions(dp, packet, &key,
+	err = execute_actions(dp, packet, &flow->key,
 			      nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
 			      nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
 	rcu_read_unlock();
+
+	flow_put(flow);
 	return err;
 
 err_unlock:
 	rcu_read_unlock();
+err_flow_put:
+	flow_put(flow);
 err_kfree_skb:
 	kfree_skb(packet);
 err:

From a4af24751b4127ae0c5cb25262b4069a7c0842ae Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Fri, 29 Apr 2011 10:49:06 -0700
Subject: [PATCH 15/40] datapath: Drop parameters from execute_actions().

It's (almost) always easier to understand a function with fewer parameters,
so this removes the now-redundant sw_flow_key and actions parameters from
execute_actions(), since they can be found through OVS_CB(skb)->flow now.

This also necessarily moves loop detection into execute_actions().
Otherwise, the flow's actions could have changed between the time that the
loop was detected and the time that it was suppressed, which would mean
that the wrong (version of the) flow would get suppressed.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
---
 datapath/actions.c      | 107 +++++++++++++++++++++++-----------------
 datapath/actions.h      |   6 +--
 datapath/datapath.c     |  34 +------------
 datapath/loop_counter.c |   5 +-
 datapath/loop_counter.h |   4 +-
 5 files changed, 70 insertions(+), 86 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index a2a572e3f..b6b713534 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -23,13 +23,13 @@
 #include "actions.h"
 #include "checksum.h"
 #include "datapath.h"
+#include "loop_counter.h"
 #include "openvswitch/datapath-protocol.h"
 #include "vlan.h"
 #include "vport.h"
 
 static int do_execute_actions(struct datapath *, struct sk_buff *,
-			      const struct sw_flow_key *,
-			      const struct nlattr *actions, u32 actions_len);
+			      struct sw_flow_actions *acts);
 
 static struct sk_buff *make_writable(struct sk_buff *skb, unsigned min_headroom)
 {
@@ -112,35 +112,34 @@ static struct sk_buff *modify_vlan_tci(struct sk_buff *skb, __be16 tci)
 	return skb;
 }
 
-static bool is_ip(struct sk_buff *skb, const struct sw_flow_key *key)
+static bool is_ip(struct sk_buff *skb)
 {
-	return (key->dl_type == htons(ETH_P_IP) &&
+	return (OVS_CB(skb)->flow->key.dl_type == htons(ETH_P_IP) &&
 		skb->transport_header > skb->network_header);
 }
 
-static __sum16 *get_l4_checksum(struct sk_buff *skb, const struct sw_flow_key *key)
+static __sum16 *get_l4_checksum(struct sk_buff *skb)
 {
+	u8 nw_proto = OVS_CB(skb)->flow->key.nw_proto;
 	int transport_len = skb->len - skb_transport_offset(skb);
-	if (key->nw_proto == IPPROTO_TCP) {
+	if (nw_proto == IPPROTO_TCP) {
 		if (likely(transport_len >= sizeof(struct tcphdr)))
 			return &tcp_hdr(skb)->check;
-	} else if (key->nw_proto == IPPROTO_UDP) {
+	} else if (nw_proto == IPPROTO_UDP) {
 		if (likely(transport_len >= sizeof(struct udphdr)))
 			return &udp_hdr(skb)->check;
 	}
 	return NULL;
 }
 
-static struct sk_buff *set_nw_addr(struct sk_buff *skb,
-				   const struct sw_flow_key *key,
-				   const struct nlattr *a)
+static struct sk_buff *set_nw_addr(struct sk_buff *skb, const struct nlattr *a)
 {
 	__be32 new_nwaddr = nla_get_be32(a);
 	struct iphdr *nh;
 	__sum16 *check;
 	__be32 *nwaddr;
 
-	if (unlikely(!is_ip(skb, key)))
+	if (unlikely(!is_ip(skb)))
 		return skb;
 
 	skb = make_writable(skb, 0);
@@ -150,7 +149,7 @@ static struct sk_buff *set_nw_addr(struct sk_buff *skb,
 	nh = ip_hdr(skb);
 	nwaddr = nla_type(a) == ODP_ACTION_ATTR_SET_NW_SRC ? &nh->saddr : &nh->daddr;
 
-	check = get_l4_checksum(skb, key);
+	check = get_l4_checksum(skb);
 	if (likely(check))
 		inet_proto_csum_replace4(check, skb, *nwaddr, new_nwaddr, 1);
 	csum_replace4(&nh->check, *nwaddr, new_nwaddr);
@@ -162,11 +161,9 @@ static struct sk_buff *set_nw_addr(struct sk_buff *skb,
 	return skb;
 }
 
-static struct sk_buff *set_nw_tos(struct sk_buff *skb,
-				  const struct sw_flow_key *key,
-				  u8 nw_tos)
+static struct sk_buff *set_nw_tos(struct sk_buff *skb, u8 nw_tos)
 {
-	if (unlikely(!is_ip(skb, key)))
+	if (unlikely(!is_ip(skb)))
 		return skb;
 
 	skb = make_writable(skb, 0);
@@ -185,15 +182,13 @@ static struct sk_buff *set_nw_tos(struct sk_buff *skb,
 	return skb;
 }
 
-static struct sk_buff *set_tp_port(struct sk_buff *skb,
-				   const struct sw_flow_key *key,
-				   const struct nlattr *a)
+static struct sk_buff *set_tp_port(struct sk_buff *skb, const struct nlattr *a)
 {
 	struct udphdr *th;
 	__sum16 *check;
 	__be16 *port;
 
-	if (unlikely(!is_ip(skb, key)))
+	if (unlikely(!is_ip(skb)))
 		return skb;
 
 	skb = make_writable(skb, 0);
@@ -201,7 +196,7 @@ static struct sk_buff *set_tp_port(struct sk_buff *skb,
 		return NULL;
 
 	/* Must follow make_writable() since that can move the skb data. */
-	check = get_l4_checksum(skb, key);
+	check = get_l4_checksum(skb);
 	if (unlikely(!check))
 		return skb;
 
@@ -226,17 +221,16 @@ static struct sk_buff *set_tp_port(struct sk_buff *skb,
  *
  * @skb: skbuff containing an Ethernet packet, with network header pointing
  * just past the Ethernet and optional 802.1Q header.
- * @key: flow key extracted from @skb by flow_extract()
  *
  * Returns true if @skb is an invalid Ethernet+IPv4 ARP packet: one with screwy
  * or truncated header fields or one whose inner and outer Ethernet address
  * differ.
  */
-static bool is_spoofed_arp(struct sk_buff *skb, const struct sw_flow_key *key)
+static bool is_spoofed_arp(struct sk_buff *skb)
 {
 	struct arp_eth_header *arp;
 
-	if (key->dl_type != htons(ETH_P_ARP))
+	if (OVS_CB(skb)->flow->key.dl_type != htons(ETH_P_ARP))
 		return false;
 
 	if (skb_network_offset(skb) + sizeof(struct arp_eth_header) > skb->len)
@@ -268,8 +262,7 @@ error:
 	kfree_skb(skb);
 }
 
-static int output_control(struct datapath *dp, struct sk_buff *skb, u64 arg,
-			  const struct sw_flow_key *key)
+static int output_control(struct datapath *dp, struct sk_buff *skb, u64 arg)
 {
 	struct dp_upcall_info upcall;
 
@@ -278,7 +271,7 @@ static int output_control(struct datapath *dp, struct sk_buff *skb, u64 arg,
 		return -ENOMEM;
 
 	upcall.cmd = ODP_PACKET_CMD_ACTION;
-	upcall.key = key;
+	upcall.key = &OVS_CB(skb)->flow->key;
 	upcall.userdata = arg;
 	upcall.sample_pool = 0;
 	upcall.actions = NULL;
@@ -288,8 +281,7 @@ static int output_control(struct datapath *dp, struct sk_buff *skb, u64 arg,
 
 /* Execute a list of actions against 'skb'. */
 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
-			      const struct sw_flow_key *key,
-			      const struct nlattr *actions, u32 actions_len)
+			      struct sw_flow_actions *acts)
 {
 	/* Every output action needs a separate clone of 'skb', but the common
 	 * case is just a single output action, so that doing a clone and
@@ -300,7 +292,8 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 	const struct nlattr *a;
 	int rem, err;
 
-	for (a = actions, rem = actions_len; rem > 0; a = nla_next(a, &rem)) {
+	for (a = acts->actions, rem = acts->actions_len; rem > 0;
+	     a = nla_next(a, &rem)) {
 		if (prev_port != -1) {
 			do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
 			prev_port = -1;
@@ -312,7 +305,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case ODP_ACTION_ATTR_CONTROLLER:
-			err = output_control(dp, skb, nla_get_u64(a), key);
+			err = output_control(dp, skb, nla_get_u64(a));
 			if (err) {
 				kfree_skb(skb);
 				return err;
@@ -347,16 +340,16 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 
 		case ODP_ACTION_ATTR_SET_NW_SRC:
 		case ODP_ACTION_ATTR_SET_NW_DST:
-			skb = set_nw_addr(skb, key, a);
+			skb = set_nw_addr(skb, a);
 			break;
 
 		case ODP_ACTION_ATTR_SET_NW_TOS:
-			skb = set_nw_tos(skb, key, nla_get_u8(a));
+			skb = set_nw_tos(skb, nla_get_u8(a));
 			break;
 
 		case ODP_ACTION_ATTR_SET_TP_SRC:
 		case ODP_ACTION_ATTR_SET_TP_DST:
-			skb = set_tp_port(skb, key, a);
+			skb = set_tp_port(skb, a);
 			break;
 
 		case ODP_ACTION_ATTR_SET_PRIORITY:
@@ -368,7 +361,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
-			if (unlikely(is_spoofed_arp(skb, key)))
+			if (unlikely(is_spoofed_arp(skb)))
 				goto exit;
 			break;
 		}
@@ -384,8 +377,7 @@ exit:
 }
 
 static void sflow_sample(struct datapath *dp, struct sk_buff *skb,
-			 const struct sw_flow_key *key,
-			 const struct nlattr *a, u32 actions_len)
+			 struct sw_flow_actions *acts)
 {
 	struct sk_buff *nskb;
 	struct vport *p = OVS_CB(skb)->vport;
@@ -403,23 +395,46 @@ static void sflow_sample(struct datapath *dp, struct sk_buff *skb,
 		return;
 
 	upcall.cmd = ODP_PACKET_CMD_SAMPLE;
-	upcall.key = key;
+	upcall.key = &OVS_CB(skb)->flow->key;
 	upcall.userdata = 0;
 	upcall.sample_pool = atomic_read(&p->sflow_pool);
-	upcall.actions = a;
-	upcall.actions_len = actions_len;
+	upcall.actions = acts->actions;
+	upcall.actions_len = acts->actions_len;
 	dp_upcall(dp, nskb, &upcall);
 }
 
 /* Execute a list of actions against 'skb'. */
-int execute_actions(struct datapath *dp, struct sk_buff *skb,
-		    const struct sw_flow_key *key,
-		    const struct nlattr *actions, u32 actions_len)
+int execute_actions(struct datapath *dp, struct sk_buff *skb)
 {
+	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+	struct loop_counter *loop;
+	int error;
+
+	/* Check whether we've looped too much. */
+	loop = loop_get_counter();
+	if (unlikely(++loop->count > MAX_LOOPS))
+		loop->looping = true;
+	if (unlikely(loop->looping)) {
+		error = loop_suppress(dp, acts);
+		kfree_skb(skb);
+		goto out_loop;
+	}
+
+	/* Really execute actions. */
 	if (dp->sflow_probability)
-		sflow_sample(dp, skb, key, actions, actions_len);
-
+		sflow_sample(dp, skb, acts);
 	OVS_CB(skb)->tun_id = 0;
+	error = do_execute_actions(dp, skb, acts);
 
-	return do_execute_actions(dp, skb, key, actions, actions_len);
+	/* Check whether sub-actions looped too much. */
+	if (unlikely(loop->looping))
+		error = loop_suppress(dp, acts);
+
+out_loop:
+	/* Decrement loop counter. */
+	if (!--loop->count)
+		loop->looping = false;
+	loop_put_counter();
+
+	return error;
 }
diff --git a/datapath/actions.h b/datapath/actions.h
index 3348ae2c2..a832295ce 100644
--- a/datapath/actions.h
+++ b/datapath/actions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011 Nicira Networks.
  * Distributed under the terms of the GNU GPL version 2.
  *
  * Significant portions of this file may be copied from parts of the Linux
@@ -16,9 +16,7 @@ struct datapath;
 struct sk_buff;
 struct sw_flow_key;
 
-int execute_actions(struct datapath *dp, struct sk_buff *skb,
-		    const struct sw_flow_key *,
-		    const struct nlattr *, u32 actions_len);
+int execute_actions(struct datapath *dp, struct sk_buff *skb);
 
 static inline void skb_clear_rxhash(struct sk_buff *skb)
 {
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 46dc7379c..05875dc18 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -49,7 +49,6 @@
 #include "datapath.h"
 #include "actions.h"
 #include "flow.h"
-#include "loop_counter.h"
 #include "table.h"
 #include "vlan.h"
 #include "vport-internal_dev.h"
@@ -267,8 +266,6 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 	struct datapath *dp = p->dp;
 	struct dp_stats_percpu *stats;
 	int stats_counter_off;
-	struct sw_flow_actions *acts;
-	struct loop_counter *loop;
 	int error;
 
 	OVS_CB(skb)->vport = p;
@@ -313,32 +310,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 
 	stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
 	flow_used(OVS_CB(skb)->flow, skb);
-
-	acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
-
-	/* Check whether we've looped too much. */
-	loop = loop_get_counter();
-	if (unlikely(++loop->count > MAX_LOOPS))
-		loop->looping = true;
-	if (unlikely(loop->looping)) {
-		loop_suppress(dp, acts);
-		kfree_skb(skb);
-		goto out_loop;
-	}
-
-	/* Execute actions. */
-	execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
-			acts->actions_len);
-
-	/* Check whether sub-actions looped too much. */
-	if (unlikely(loop->looping))
-		loop_suppress(dp, acts);
-
-out_loop:
-	/* Decrement loop counter. */
-	if (!--loop->count)
-		loop->looping = false;
-	loop_put_counter();
+	execute_actions(dp, skb);
 
 out:
 	/* Update datapath statistics. */
@@ -739,9 +711,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	err = -ENODEV;
 	if (!dp)
 		goto err_unlock;
-	err = execute_actions(dp, packet, &flow->key,
-			      nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
-			      nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
+	err = execute_actions(dp, packet);
 	rcu_read_unlock();
 
 	flow_put(flow);
diff --git a/datapath/loop_counter.c b/datapath/loop_counter.c
index 491305d97..3e1d890aa 100644
--- a/datapath/loop_counter.c
+++ b/datapath/loop_counter.c
@@ -1,6 +1,6 @@
 /*
  * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) 2010 Nicira Networks.
+ * Copyright (c) 2010, 2011 Nicira Networks.
  *
  * Significant portions of this file may be copied from parts of the Linux
  * kernel, by Linus Torvalds and others.
@@ -15,12 +15,13 @@
 
 #include "loop_counter.h"
 
-void loop_suppress(struct datapath *dp, struct sw_flow_actions *actions)
+int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions)
 {
 	if (net_ratelimit())
 		pr_warn("%s: flow looped %d times, dropping\n",
 			dp_name(dp), MAX_LOOPS);
 	actions->actions_len = 0;
+	return -ELOOP;
 }
 
 #ifndef CONFIG_PREEMPT_RT
diff --git a/datapath/loop_counter.h b/datapath/loop_counter.h
index e08afb1d2..0d1fdf960 100644
--- a/datapath/loop_counter.h
+++ b/datapath/loop_counter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 Nicira Networks.
+ * Copyright (c) 2010, 2011 Nicira Networks.
  * Distributed under the terms of the GNU GPL version 2.
  *
  * Significant portions of this file may be copied from parts of the Linux
@@ -23,6 +23,6 @@ struct loop_counter {
 
 struct loop_counter *loop_get_counter(void);
 void loop_put_counter(void);
-void loop_suppress(struct datapath *, struct sw_flow_actions *);
+int loop_suppress(struct datapath *, struct sw_flow_actions *);
 
 #endif /* loop_counter.h */

From 0ee17a3ba726ec23cc49f2c40da4b084084dbdb7 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Thu, 28 Apr 2011 17:13:50 -0700
Subject: [PATCH 16/40] datapath: Remove dead code in queue_control_packets().

Fixes the following warning:

datapath.c:473:6: warning: variable 'port_no' set but not used
[-Wunused-but-set-variable]

Signed-off-by: Ethan Jackson <ethan@nicira.com>
---
 datapath/datapath.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 05875dc18..fc00d7878 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -442,14 +442,8 @@ static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
 {
 	u32 group = packet_mc_group(dp, upcall_info->cmd);
 	struct sk_buff *nskb;
-	int port_no;
 	int err;
 
-	if (OVS_CB(skb)->vport)
-		port_no = OVS_CB(skb)->vport->port_no;
-	else
-		port_no = ODPP_LOCAL;
-
 	do {
 		struct odp_header *upcall;
 		struct sk_buff *user_skb; /* to be queued to userspace */

From 8522b383863cbc8f0d530fc7d44c0643dcac541d Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Fri, 29 Apr 2011 13:12:19 -0700
Subject: [PATCH 17/40] dpif-linux: Recycle leaked ports.

When ports are deleted from the datapath they need to be added to
an LRU list maintained in dpif-linux so they may be reallocated.
When using vswitchd to delete the ports this happens automatically.
However, if a port is deleted directly from the datapath it is
never reclaimed by dpif-linux.  If this happens often, eventually
no ports will be available for allocation and dpif-linux will fall
back to using the old, kernel implemented, allocation strategy.

This commit fixes the problem by automatically reclaiming ports
missing from the datapath whenever the list of ports in the
datapath is dumped.

Bug #2140.
---
 lib/dpif-linux.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index fa8eea6e7..7b3670910 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -472,6 +472,8 @@ dpif_linux_flow_flush(struct dpif *dpif_)
 
 struct dpif_linux_port_state {
     struct nl_dump dump;
+    unsigned long *port_bitmap; /* Ports in the datapath. */
+    bool complete;              /* Dump completed without error. */
 };
 
 static int
@@ -483,6 +485,8 @@ dpif_linux_port_dump_start(const struct dpif *dpif_, void **statep)
     struct ofpbuf *buf;
 
     *statep = state = xmalloc(sizeof *state);
+    state->port_bitmap = bitmap_allocate(LRU_MAX_PORTS);
+    state->complete = false;
 
     dpif_linux_vport_init(&request);
     request.cmd = ODP_DP_CMD_GET;
@@ -506,6 +510,7 @@ dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_,
     int error;
 
     if (!nl_dump_next(&state->dump, &buf)) {
+        state->complete = true;
         return EOF;
     }
 
@@ -514,6 +519,10 @@ dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_,
         return error;
     }
 
+    if (vport.port_no < LRU_MAX_PORTS) {
+        bitmap_set1(state->port_bitmap, vport.port_no);
+    }
+
     dpif_port->name = (char *) vport.name;
     dpif_port->type = (char *) netdev_vport_get_netdev_type(&vport);
     dpif_port->port_no = vport.port_no;
@@ -521,10 +530,23 @@ dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_,
 }
 
 static int
-dpif_linux_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
+dpif_linux_port_dump_done(const struct dpif *dpif_, void *state_)
 {
+    struct dpif_linux *dpif = dpif_linux_cast(dpif_);
     struct dpif_linux_port_state *state = state_;
     int error = nl_dump_done(&state->dump);
+
+    if (state->complete) {
+        uint16_t i;
+
+        for (i = 0; i < LRU_MAX_PORTS; i++) {
+            if (!bitmap_is_set(state->port_bitmap, i)) {
+                dpif_linux_push_port(dpif, i);
+            }
+        }
+    }
+
+    free(state->port_bitmap);
     free(state);
     return error;
 }

From 66409d1bccbdddd8833f74876a1e7ef250034d4e Mon Sep 17 00:00:00 2001
From: Andrew Evans <aevans@nicira.com>
Date: Fri, 29 Apr 2011 17:05:58 -0700
Subject: [PATCH 18/40] tunneling: Add df_default and df_inherit tunnel
 options.

Split existing pmtud tunnel option's functionality into three. Existing pmtud
option still exists, but now governs only whether datapath sends ICMP frag
needed messages. New df_inherit option controls whether DF bit is copied from
packet inner header to outer tunnel header. New df_default option controls
whether DF bit is set if inner packet isn't IP or if df_inherit is disabled.

Suggested-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Andrew Evans <aevans@nicira.com>
Feature #5456.
---
 datapath/tunnel.c            | 14 ++++----
 include/openvswitch/tunnel.h |  8 +++--
 lib/netdev-vport.c           | 16 ++++++++-
 vswitchd/vswitch.xml         | 66 ++++++++++++++++++++++++++++--------
 4 files changed, 79 insertions(+), 25 deletions(-)

diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index 899d1cdcb..70a4cd7ca 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -725,8 +725,9 @@ static bool check_mtu(struct sk_buff *skb,
 		      const struct tnl_mutable_config *mutable,
 		      const struct rtable *rt, __be16 *frag_offp)
 {
+	bool df_inherit = mutable->flags & TNL_F_DF_INHERIT;
 	bool pmtud = mutable->flags & TNL_F_PMTUD;
-	__be16 frag_off = 0;
+	__be16 frag_off = mutable->flags & TNL_F_DF_DEFAULT ? htons(IP_DF) : 0;
 	int mtu = 0;
 	unsigned int packet_length = skb->len - ETH_HLEN;
 
@@ -738,8 +739,6 @@ static bool check_mtu(struct sk_buff *skb,
 	if (pmtud) {
 		int vlan_header = 0;
 
-		frag_off = htons(IP_DF);
-
 		/* The tag needs to go in packet regardless of where it
 		 * currently is, so subtract it from the MTU.
 		 */
@@ -756,7 +755,8 @@ static bool check_mtu(struct sk_buff *skb,
 	if (skb->protocol == htons(ETH_P_IP)) {
 		struct iphdr *iph = ip_hdr(skb);
 
-		frag_off |= iph->frag_off & htons(IP_DF);
+		if (df_inherit)
+			frag_off = iph->frag_off & htons(IP_DF);
 
 		if (pmtud && iph->frag_off & htons(IP_DF)) {
 			mtu = max(mtu, IP_MIN_MTU);
@@ -769,8 +769,10 @@ static bool check_mtu(struct sk_buff *skb,
 	}
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
-		/* IPv6 requires PMTUD if the packet is above the minimum MTU. */
-		if (packet_length > IPV6_MIN_MTU)
+		/* IPv6 requires end hosts to do fragmentation
+		 * if the packet is above the minimum MTU.
+		 */
+		if (df_inherit && packet_length > IPV6_MIN_MTU)
 			frag_off = htons(IP_DF);
 
 		if (pmtud) {
diff --git a/include/openvswitch/tunnel.h b/include/openvswitch/tunnel.h
index a394f8a8d..5e2e91644 100644
--- a/include/openvswitch/tunnel.h
+++ b/include/openvswitch/tunnel.h
@@ -65,8 +65,10 @@ enum {
 #define TNL_F_CSUM		(1 << 0) /* Checksum packets. */
 #define TNL_F_TOS_INHERIT	(1 << 1) /* Inherit the ToS from the inner packet. */
 #define TNL_F_TTL_INHERIT	(1 << 2) /* Inherit the TTL from the inner packet. */
-#define TNL_F_PMTUD		(1 << 3) /* Enable path MTU discovery. */
-#define TNL_F_HDR_CACHE		(1 << 4) /* Enable tunnel header caching. */
-#define TNL_F_IPSEC		(1 << 5) /* Traffic is IPsec encrypted. */
+#define TNL_F_DF_INHERIT	(1 << 3) /* Inherit the DF bit from the inner packet. */
+#define TNL_F_DF_DEFAULT	(1 << 4) /* Set the DF bit if inherit off or not IP. */
+#define TNL_F_PMTUD		(1 << 5) /* Enable path MTU discovery. */
+#define TNL_F_HDR_CACHE		(1 << 6) /* Enable tunnel header caching. */
+#define TNL_F_IPSEC		(1 << 7) /* Traffic is IPsec encrypted. */
 
 #endif /* openvswitch/tunnel.h */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index be2694159..f31f85fd1 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -667,7 +667,7 @@ parse_tunnel_config(const char *name, const char *type,
     ovs_be32 daddr = htonl(0);
     uint32_t flags;
 
-    flags = TNL_F_PMTUD | TNL_F_HDR_CACHE;
+    flags = TNL_F_DF_DEFAULT | TNL_F_PMTUD | TNL_F_HDR_CACHE;
     if (!strcmp(type, "gre")) {
         is_gre = true;
     } else if (!strcmp(type, "ipsec_gre")) {
@@ -709,6 +709,14 @@ parse_tunnel_config(const char *name, const char *type,
             if (!strcmp(node->data, "true")) {
                 flags |= TNL_F_CSUM;
             }
+        } else if (!strcmp(node->name, "df_inherit")) {
+            if (!strcmp(node->data, "true")) {
+                flags |= TNL_F_DF_INHERIT;
+            }
+        } else if (!strcmp(node->name, "df_default")) {
+            if (!strcmp(node->data, "false")) {
+                flags &= ~TNL_F_DF_DEFAULT;
+            }
         } else if (!strcmp(node->name, "pmtud")) {
             if (!strcmp(node->data, "false")) {
                 flags &= ~TNL_F_PMTUD;
@@ -891,6 +899,12 @@ unparse_tunnel_config(const char *name OVS_UNUSED, const char *type OVS_UNUSED,
     if (flags & TNL_F_CSUM) {
         smap_add(args, "csum", "true");
     }
+    if (flags & TNL_F_DF_INHERIT) {
+        smap_add(args, "df_inherit", "true");
+    }
+    if (!(flags & TNL_F_DF_DEFAULT)) {
+        smap_add(args, "df_default", "false");
+    }
     if (!(flags & TNL_F_PMTUD)) {
         smap_add(args, "pmtud", "false");
     }
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 48315fa9c..1db89dc5b 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -830,19 +830,31 @@
                 adds value for the GRE and encapsulated Ethernet headers.
                 Default is disabled, set to <code>true</code> to enable.</dd>
             </dl>
+            <dl>
+              <dt><code>df_inherit</code></dt>
+              <dd>Optional.  If enabled, the Don't Fragment bit will be copied
+                from the inner IP headers (those of the encapsulated traffic)
+                to the outer (tunnel) headers.  Default is disabled; set to
+                <code>true</code> to enable.</dd>
+            </dl>
+            <dl>
+              <dt><code>df_default</code></dt>
+              <dd>Optional.  If enabled, the Don't Fragment bit will be set by
+                default on tunnel headers if the <code>df_inherit</code> option
+                is not set, or if the encapsulated packet is not IP.  Default
+                is enabled; set to <code>false</code> to disable.</dd>
+            </dl>
             <dl>
               <dt><code>pmtud</code></dt>
               <dd>Optional.  Enable tunnel path MTU discovery.  If enabled
-                ``ICMP destination unreachable - fragmentation'' needed
+                ``ICMP Destination Unreachable - Fragmentation Needed''
                 messages will be generated for IPv4 packets with the DF bit set
                 and IPv6 packets above the minimum MTU if the packet size
-                exceeds the path MTU minus the size of the tunnel headers.  It
-                also forces the encapsulating packet DF bit to be set (it is
-                always set if the inner packet implies path MTU discovery).
+                exceeds the path MTU minus the size of the tunnel headers.
                 Note that this option causes behavior that is typically
                 reserved for routers and therefore is not entirely in
                 compliance with the IEEE 802.1D specification for bridges.
-                Default is enabled, set to <code>false</code> to disable.</dd>
+                Default is enabled; set to <code>false</code> to disable.</dd>
             </dl>
             <dl>
               <dt><code>header_cache</code></dt>
@@ -956,19 +968,31 @@
                 adds value for the GRE and encapsulated Ethernet headers.
                 Default is disabled, set to <code>true</code> to enable.</dd>
             </dl>
+            <dl>
+              <dt><code>df_inherit</code></dt>
+              <dd>Optional.  If enabled, the Don't Fragment bit will be copied
+                from the inner IP headers (those of the encapsulated traffic)
+                to the outer (tunnel) headers.  Default is disabled; set to
+                <code>true</code> to enable.</dd>
+            </dl>
+            <dl>
+              <dt><code>df_default</code></dt>
+              <dd>Optional.  If enabled, the Don't Fragment bit will be set by
+                default on tunnel headers if the <code>df_inherit</code> option
+                is not set, or if the encapsulated packet is not IP.  Default
+                is enabled; set to <code>false</code> to disable.</dd>
+            </dl>
             <dl>
               <dt><code>pmtud</code></dt>
               <dd>Optional.  Enable tunnel path MTU discovery.  If enabled
-                ``ICMP destination unreachable - fragmentation'' needed
+                ``ICMP Destination Unreachable - Fragmentation Needed''
                 messages will be generated for IPv4 packets with the DF bit set
                 and IPv6 packets above the minimum MTU if the packet size
-                exceeds the path MTU minus the size of the tunnel headers.  It
-                also forces the encapsulating packet DF bit to be set (it is
-                always set if the inner packet implies path MTU discovery).
+                exceeds the path MTU minus the size of the tunnel headers.
                 Note that this option causes behavior that is typically
                 reserved for routers and therefore is not entirely in
                 compliance with the IEEE 802.1D specification for bridges.
-                Default is enabled, set to <code>false</code> to disable.</dd>
+                Default is enabled; set to <code>false</code> to disable.</dd>
             </dl>
           </dd>
           <dt><code>capwap</code></dt>
@@ -1011,19 +1035,31 @@
                 (otherwise it will be the system default, typically 64).
                 Default is the system default TTL.</dd>
             </dl>
+            <dl>
+              <dt><code>df_inherit</code></dt>
+              <dd>Optional.  If enabled, the Don't Fragment bit will be copied
+                from the inner IP headers (those of the encapsulated traffic)
+                to the outer (tunnel) headers.  Default is disabled; set to
+                <code>true</code> to enable.</dd>
+            </dl>
+            <dl>
+              <dt><code>df_default</code></dt>
+              <dd>Optional.  If enabled, the Don't Fragment bit will be set by
+                default on tunnel headers if the <code>df_inherit</code> option
+                is not set, or if the encapsulated packet is not IP.  Default
+                is enabled; set to <code>false</code> to disable.</dd>
+            </dl>
             <dl>
               <dt><code>pmtud</code></dt>
               <dd>Optional.  Enable tunnel path MTU discovery.  If enabled
-                ``ICMP destination unreachable - fragmentation'' needed
+                ``ICMP Destination Unreachable - Fragmentation Needed''
                 messages will be generated for IPv4 packets with the DF bit set
                 and IPv6 packets above the minimum MTU if the packet size
-                exceeds the path MTU minus the size of the tunnel headers.  It
-                also forces the encapsulating packet DF bit to be set (it is
-                always set if the inner packet implies path MTU discovery).
+                exceeds the path MTU minus the size of the tunnel headers.
                 Note that this option causes behavior that is typically
                 reserved for routers and therefore is not entirely in
                 compliance with the IEEE 802.1D specification for bridges.
-                Default is enabled, set to <code>false</code> to disable.</dd>
+                Default is enabled; set to <code>false</code> to disable.</dd>
             </dl>
             <dl>
               <dt><code>header_cache</code></dt>

From dc432c2e8159a406e0e45fae4ff49532191b9129 Mon Sep 17 00:00:00 2001
From: Andrew Evans <aevans@nicira.com>
Date: Sun, 1 May 2011 10:18:45 -0700
Subject: [PATCH 19/40] tunneling: Add DF inherit and default flags to set of
 public tunnel flags.

Signed-off-by: Andrew Evans <aevans@nicira.com>
Acked-by: Jesse Gross <jesse@nicira.com>
---
 datapath/tunnel.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datapath/tunnel.h b/datapath/tunnel.h
index 0fd6a69f1..784cda1a9 100644
--- a/datapath/tunnel.h
+++ b/datapath/tunnel.h
@@ -42,7 +42,8 @@
 
 /* All public tunnel flags. */
 #define TNL_F_PUBLIC (TNL_F_CSUM | TNL_F_TOS_INHERIT | TNL_F_TTL_INHERIT | \
-		      TNL_F_PMTUD | TNL_F_HDR_CACHE | TNL_F_IPSEC)
+		      TNL_F_DF_INHERIT | TNL_F_DF_DEFAULT | TNL_F_PMTUD | \
+		      TNL_F_HDR_CACHE | TNL_F_IPSEC)
 
 /**
  * struct tnl_mutable_config - modifiable configuration for a tunnel.

From d39808227b8a8e794a7cb0b990f4fcb0f5daadf5 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 28 Apr 2011 11:13:53 -0700
Subject: [PATCH 20/40] netdev-linux: New functions for converting netdev stats
 formats.

An upcoming commit will introduce another function that needs to convert
between rtnl_link_stats64 and netdev_stats, so it seemed best to just add
functions to do the conversion.
---
 lib/automake.mk    |  1 +
 lib/netdev-linux.c | 75 ++++++++++++++++++++++++++++++++--------------
 lib/netdev-linux.h | 34 +++++++++++++++++++++
 lib/netdev-vport.c | 45 ++--------------------------
 4 files changed, 90 insertions(+), 65 deletions(-)
 create mode 100644 lib/netdev-linux.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 802cc9920..7c1977f80 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -196,6 +196,7 @@ lib_libopenvswitch_a_SOURCES += \
 	lib/dpif-linux.c \
 	lib/dpif-linux.h \
 	lib/netdev-linux.c \
+	lib/netdev-linux.h \
 	lib/netdev-vport.c \
 	lib/netdev-vport.h \
 	lib/netlink-protocol.h \
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index e2af6594f..384fdafee 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -15,6 +15,9 @@
  */
 
 #include <config.h>
+
+#include "netdev-linux.h"
+
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -3925,7 +3928,55 @@ tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes)
     unsigned int min_burst = tc_buffer_per_jiffy(Bps) + mtu;
     return tc_bytes_to_ticks(Bps, MAX(burst_bytes, min_burst));
 }
+
+/* Public utility functions. */
 
+#define COPY_NETDEV_STATS                                   \
+    dst->rx_packets = src->rx_packets;                      \
+    dst->tx_packets = src->tx_packets;                      \
+    dst->rx_bytes = src->rx_bytes;                          \
+    dst->tx_bytes = src->tx_bytes;                          \
+    dst->rx_errors = src->rx_errors;                        \
+    dst->tx_errors = src->tx_errors;                        \
+    dst->rx_dropped = src->rx_dropped;                      \
+    dst->tx_dropped = src->tx_dropped;                      \
+    dst->multicast = src->multicast;                        \
+    dst->collisions = src->collisions;                      \
+    dst->rx_length_errors = src->rx_length_errors;          \
+    dst->rx_over_errors = src->rx_over_errors;              \
+    dst->rx_crc_errors = src->rx_crc_errors;                \
+    dst->rx_frame_errors = src->rx_frame_errors;            \
+    dst->rx_fifo_errors = src->rx_fifo_errors;              \
+    dst->rx_missed_errors = src->rx_missed_errors;          \
+    dst->tx_aborted_errors = src->tx_aborted_errors;        \
+    dst->tx_carrier_errors = src->tx_carrier_errors;        \
+    dst->tx_fifo_errors = src->tx_fifo_errors;              \
+    dst->tx_heartbeat_errors = src->tx_heartbeat_errors;    \
+    dst->tx_window_errors = src->tx_window_errors
+
+/* Copies 'src' into 'dst', performing format conversion in the process. */
+void
+netdev_stats_from_rtnl_link_stats(struct netdev_stats *dst,
+                                  const struct rtnl_link_stats *src)
+{
+    COPY_NETDEV_STATS;
+}
+
+/* Copies 'src' into 'dst', performing format conversion in the process. */
+void
+netdev_stats_from_rtnl_link_stats64(struct netdev_stats *dst,
+                                    const struct rtnl_link_stats64 *src)
+{
+    COPY_NETDEV_STATS;
+}
+
+/* Copies 'src' into 'dst', performing format conversion in the process. */
+void
+netdev_stats_to_rtnl_link_stats64(struct rtnl_link_stats64 *dst,
+                                  const struct netdev_stats *src)
+{
+    COPY_NETDEV_STATS;
+}
 
 /* Utility functions. */
 
@@ -3945,7 +3996,6 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
     struct ofpbuf request;
     struct ofpbuf *reply;
     struct ifinfomsg *ifi;
-    const struct rtnl_link_stats *rtnl_stats;
     struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)];
     int error;
 
@@ -3973,28 +4023,7 @@ get_stats_via_netlink(int ifindex, struct netdev_stats *stats)
         return EPROTO;
     }
 
-    rtnl_stats = nl_attr_get(attrs[IFLA_STATS]);
-    stats->rx_packets = rtnl_stats->rx_packets;
-    stats->tx_packets = rtnl_stats->tx_packets;
-    stats->rx_bytes = rtnl_stats->rx_bytes;
-    stats->tx_bytes = rtnl_stats->tx_bytes;
-    stats->rx_errors = rtnl_stats->rx_errors;
-    stats->tx_errors = rtnl_stats->tx_errors;
-    stats->rx_dropped = rtnl_stats->rx_dropped;
-    stats->tx_dropped = rtnl_stats->tx_dropped;
-    stats->multicast = rtnl_stats->multicast;
-    stats->collisions = rtnl_stats->collisions;
-    stats->rx_length_errors = rtnl_stats->rx_length_errors;
-    stats->rx_over_errors = rtnl_stats->rx_over_errors;
-    stats->rx_crc_errors = rtnl_stats->rx_crc_errors;
-    stats->rx_frame_errors = rtnl_stats->rx_frame_errors;
-    stats->rx_fifo_errors = rtnl_stats->rx_fifo_errors;
-    stats->rx_missed_errors = rtnl_stats->rx_missed_errors;
-    stats->tx_aborted_errors = rtnl_stats->tx_aborted_errors;
-    stats->tx_carrier_errors = rtnl_stats->tx_carrier_errors;
-    stats->tx_fifo_errors = rtnl_stats->tx_fifo_errors;
-    stats->tx_heartbeat_errors = rtnl_stats->tx_heartbeat_errors;
-    stats->tx_window_errors = rtnl_stats->tx_window_errors;
+    netdev_stats_from_rtnl_link_stats(stats, nl_attr_get(attrs[IFLA_STATS]));
 
     ofpbuf_delete(reply);
 
diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h
new file mode 100644
index 000000000..7a112049b
--- /dev/null
+++ b/lib/netdev-linux.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2011 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NETDEV_LINUX_H
+#define NETDEV_LINUX_H 1
+
+/* These functions are Linux specific, so they should be used directly only by
+ * Linux-specific code. */
+
+struct netdev_stats;
+struct rtnl_link_stats;
+struct rtnl_link_stats64;
+
+void netdev_stats_from_rtnl_link_stats(struct netdev_stats *dst,
+                                       const struct rtnl_link_stats *src);
+void netdev_stats_from_rtnl_link_stats64(struct netdev_stats *dst,
+                                         const struct rtnl_link_stats64 *src);
+void netdev_stats_to_rtnl_link_stats64(struct rtnl_link_stats64 *dst,
+                                       const struct netdev_stats *src);
+
+#endif /* netdev-linux.h */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index f31f85fd1..e11cb2a8d 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -32,6 +32,7 @@
 #include "hash.h"
 #include "hmap.h"
 #include "list.h"
+#include "netdev-linux.h"
 #include "netdev-provider.h"
 #include "netlink.h"
 #include "netlink-socket.h"
@@ -416,27 +417,7 @@ netdev_vport_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
         return EOPNOTSUPP;
     }
 
-    stats->rx_packets = reply.stats->rx_packets;
-    stats->tx_packets = reply.stats->tx_packets;
-    stats->rx_bytes = reply.stats->rx_bytes;
-    stats->tx_bytes = reply.stats->tx_bytes;
-    stats->rx_errors = reply.stats->rx_errors;
-    stats->tx_errors = reply.stats->tx_errors;
-    stats->rx_dropped = reply.stats->rx_dropped;
-    stats->tx_dropped = reply.stats->tx_dropped;
-    stats->multicast = reply.stats->multicast;
-    stats->collisions = reply.stats->collisions;
-    stats->rx_length_errors = reply.stats->rx_length_errors;
-    stats->rx_over_errors = reply.stats->rx_over_errors;
-    stats->rx_crc_errors = reply.stats->rx_crc_errors;
-    stats->rx_frame_errors = reply.stats->rx_frame_errors;
-    stats->rx_fifo_errors = reply.stats->rx_fifo_errors;
-    stats->rx_missed_errors = reply.stats->rx_missed_errors;
-    stats->tx_aborted_errors = reply.stats->tx_aborted_errors;
-    stats->tx_carrier_errors = reply.stats->tx_carrier_errors;
-    stats->tx_fifo_errors = reply.stats->tx_fifo_errors;
-    stats->tx_heartbeat_errors = reply.stats->tx_heartbeat_errors;
-    stats->tx_window_errors = reply.stats->tx_window_errors;
+    netdev_stats_from_rtnl_link_stats64(stats, reply.stats);
 
     ofpbuf_delete(buf);
 
@@ -450,27 +431,7 @@ netdev_vport_set_stats(struct netdev *netdev, const struct netdev_stats *stats)
     struct dpif_linux_vport vport;
     int err;
 
-    rtnl_stats.rx_packets = stats->rx_packets;
-    rtnl_stats.tx_packets = stats->tx_packets;
-    rtnl_stats.rx_bytes = stats->rx_bytes;
-    rtnl_stats.tx_bytes = stats->tx_bytes;
-    rtnl_stats.rx_errors = stats->rx_errors;
-    rtnl_stats.tx_errors = stats->tx_errors;
-    rtnl_stats.rx_dropped = stats->rx_dropped;
-    rtnl_stats.tx_dropped = stats->tx_dropped;
-    rtnl_stats.multicast = stats->multicast;
-    rtnl_stats.collisions = stats->collisions;
-    rtnl_stats.rx_length_errors = stats->rx_length_errors;
-    rtnl_stats.rx_over_errors = stats->rx_over_errors;
-    rtnl_stats.rx_crc_errors = stats->rx_crc_errors;
-    rtnl_stats.rx_frame_errors = stats->rx_frame_errors;
-    rtnl_stats.rx_fifo_errors = stats->rx_fifo_errors;
-    rtnl_stats.rx_missed_errors = stats->rx_missed_errors;
-    rtnl_stats.tx_aborted_errors = stats->tx_aborted_errors;
-    rtnl_stats.tx_carrier_errors = stats->tx_carrier_errors;
-    rtnl_stats.tx_fifo_errors = stats->tx_fifo_errors;
-    rtnl_stats.tx_heartbeat_errors = stats->tx_heartbeat_errors;
-    rtnl_stats.tx_window_errors = stats->tx_window_errors;
+    netdev_stats_to_rtnl_link_stats64(&rtnl_stats, stats);
 
     dpif_linux_vport_init(&vport);
     vport.cmd = ODP_VPORT_CMD_SET;

From 032aa6a3543621213f133aff3e31021dfd4bef43 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 28 Apr 2011 13:02:15 -0700
Subject: [PATCH 21/40] ovs-dpctl: Add -s option to print packet and byte
 counters.

---
 ChangeLog                |  2 ++
 lib/dpif-linux.c         | 12 ++++++++
 lib/dpif.c               |  1 +
 lib/dpif.h               |  3 +-
 utilities/ovs-dpctl.8.in |  9 ++++--
 utilities/ovs-dpctl.c    | 62 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 18f922808..bd0c40da1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,7 @@
 post v1.1.0
 ------------------------
+    - The new "-s" option for "ovs-dpctl show" prints packet and byte
+      counters for each port.
     - Feature removals:
       - Dropped support for "tun_id_from_cookie" OpenFlow extension.
 	(Use the extensible match extensions instead.)
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 7b3670910..0aad8671e 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -35,6 +35,7 @@
 #include "bitmap.h"
 #include "dpif-provider.h"
 #include "netdev.h"
+#include "netdev-linux.h"
 #include "netdev-vport.h"
 #include "netlink-socket.h"
 #include "netlink.h"
@@ -431,6 +432,12 @@ dpif_linux_port_query__(const struct dpif *dpif, uint32_t port_no,
         dpif_port->name = xstrdup(reply.name);
         dpif_port->type = xstrdup(netdev_vport_get_netdev_type(&reply));
         dpif_port->port_no = reply.port_no;
+        if (reply.stats) {
+            netdev_stats_from_rtnl_link_stats64(&dpif_port->stats,
+                                                reply.stats);
+        } else {
+            memset(&dpif_port->stats, 0xff, sizeof dpif_port->stats);
+        }
         ofpbuf_delete(buf);
     }
     return error;
@@ -526,6 +533,11 @@ dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_,
     dpif_port->name = (char *) vport.name;
     dpif_port->type = (char *) netdev_vport_get_netdev_type(&vport);
     dpif_port->port_no = vport.port_no;
+    if (vport.stats) {
+        netdev_stats_from_rtnl_link_stats64(&dpif_port->stats, vport.stats);
+    } else {
+        memset(&dpif_port->stats, 0xff, sizeof dpif_port->stats);
+    }
     return 0;
 }
 
diff --git a/lib/dpif.c b/lib/dpif.c
index 81e180fbb..630bcade6 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -472,6 +472,7 @@ dpif_port_clone(struct dpif_port *dst, const struct dpif_port *src)
     dst->name = xstrdup(src->name);
     dst->type = xstrdup(src->type);
     dst->port_no = src->port_no;
+    dst->stats = src->stats;
 }
 
 /* Frees memory allocated to members of 'dpif_port'.
diff --git a/lib/dpif.h b/lib/dpif.h
index 0e0f407c3..a039f1134 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -23,6 +23,7 @@
 #include <stdint.h>
 #include "openflow/openflow.h"
 #include "openvswitch/datapath-protocol.h"
+#include "netdev.h"
 #include "util.h"
 
 #ifdef  __cplusplus
@@ -31,7 +32,6 @@ extern "C" {
 
 struct dpif;
 struct ds;
-struct netdev;
 struct nlattr;
 struct ofpbuf;
 struct sset;
@@ -71,6 +71,7 @@ struct dpif_port {
     char *name;                 /* Network device name, e.g. "eth0". */
     char *type;                 /* Network device type, e.g. "system". */
     uint32_t port_no;           /* Port number within datapath. */
+    struct netdev_stats stats;  /* Port statistics. */
 };
 void dpif_port_clone(struct dpif_port *, const struct dpif_port *);
 void dpif_port_destroy(struct dpif_port *);
diff --git a/utilities/ovs-dpctl.8.in b/utilities/ovs-dpctl.8.in
index 58b3ea0ea..5b5941bed 100644
--- a/utilities/ovs-dpctl.8.in
+++ b/utilities/ovs-dpctl.8.in
@@ -78,10 +78,12 @@ Removes each \fInetdev\fR from the list of network devices datapath
 Prints the name of each configured datapath on a separate line.
 .
 .TP
-\fBshow \fR[\fIdp\fR...]
+[\fB\-s\fR | \fB\-\-statistics\fR] \fBshow \fR[\fIdp\fR...]
 Prints a summary of configured datapaths, including their datapath
 numbers and a list of ports connected to each datapath.  (The local
-port is identified as port 0.)
+port is identified as port 0.)  If \fB\-s\fR or \fB\-\-statistics\fR
+is specified, then packet and byte counters are also printed for each
+port.
 .IP
 If one or more datapaths are specified, information on only those
 datapaths are displayed.  Otherwise, \fBovs\-dpctl\fR displays information
@@ -104,6 +106,9 @@ not OpenFlow flow entries.  By deleting them, the process that set them
 up may be confused about their disappearance.
 .
 .SH OPTIONS
+.IP "\fB\-s\fR, \fB\-\-statistics\fR"
+Causes the \fBshow\fR command to print packet and byte counters for
+each port within the datapaths that it shows.
 .TP
 \fB\-t\fR, \fB\-\-timeout=\fIsecs\fR
 Limits \fBovs\-dpctl\fR runtime to approximately \fIsecs\fR seconds.  If
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
index 9d6bf9852..40048b445 100644
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -45,6 +45,9 @@
 
 VLOG_DEFINE_THIS_MODULE(dpctl);
 
+/* -s, --statistics: Print port statistics? */
+bool print_statistics;
+
 static const struct command all_commands[];
 
 static void usage(void) NO_RETURN;
@@ -68,6 +71,7 @@ parse_options(int argc, char *argv[])
         VLOG_OPTION_ENUMS
     };
     static struct option long_options[] = {
+        {"statistics", no_argument, 0, 's'},
         {"timeout", required_argument, 0, 't'},
         {"help", no_argument, 0, 'h'},
         {"version", no_argument, 0, 'V'},
@@ -86,6 +90,10 @@ parse_options(int argc, char *argv[])
         }
 
         switch (c) {
+        case 's':
+            print_statistics = true;
+            break;
+
         case 't':
             timeout = strtoul(optarg, NULL, 10);
             if (timeout <= 0) {
@@ -320,6 +328,33 @@ do_del_if(int argc OVS_UNUSED, char *argv[])
     }
 }
 
+static void
+print_stat(const char *leader, uint64_t value)
+{
+    fputs(leader, stdout);
+    if (value != UINT64_MAX) {
+        printf("%"PRIu64, value);
+    } else {
+        putchar('?');
+    }
+}
+
+static void
+print_human_size(uint64_t value)
+{
+    if (value == UINT64_MAX) {
+        /* Nothing to do. */
+    } else if (value >= 1024ULL * 1024 * 1024 * 1024) {
+        printf(" (%.1f TiB)", value / (1024.0 * 1024 * 1024 * 1024));
+    } else if (value >= 1024ULL * 1024 * 1024) {
+        printf(" (%.1f GiB)", value / (1024.0 * 1024 * 1024));
+    } else if (value >= 1024ULL * 1024) {
+        printf(" (%.1f MiB)", value / (1024.0 * 1024));
+    } else if (value >= 1024) {
+        printf(" (%.1f KiB)", value / 1024.0);
+    }
+}
+
 static void
 show_dpif(struct dpif *dpif)
 {
@@ -371,6 +406,33 @@ show_dpif(struct dpif *dpif)
             putchar(')');
         }
         putchar('\n');
+
+        if (print_statistics) {
+            const struct netdev_stats *s = &dpif_port.stats;
+
+            print_stat("\t\tRX packets:", s->rx_packets);
+            print_stat(" errors:", s->rx_errors);
+            print_stat(" dropped:", s->rx_dropped);
+            print_stat(" overruns:", s->rx_over_errors);
+            print_stat(" frame:", s->rx_frame_errors);
+            printf("\n");
+
+            print_stat("\t\tTX packets:", s->tx_packets);
+            print_stat(" errors:", s->tx_errors);
+            print_stat(" dropped:", s->tx_dropped);
+            print_stat(" aborted:", s->tx_aborted_errors);
+            print_stat(" carrier:", s->tx_carrier_errors);
+            printf("\n");
+
+            print_stat("\t\tcollisions:", s->collisions);
+            printf("\n");
+
+            print_stat("\t\tRX bytes:", s->rx_bytes);
+            print_human_size(s->rx_bytes);
+            print_stat("  TX bytes:", s->tx_bytes);
+            print_human_size(s->tx_bytes);
+            printf("\n");
+        }
     }
     dpif_close(dpif);
 }

From 002c42961c2d2cbc8e3ce9458f9bd1ef3e93cfb1 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Mon, 2 May 2011 16:33:01 -0700
Subject: [PATCH 22/40] vswitchd: Update schema version number.

Quite a few changes to LACP and bonding have gone in recently which
allowed additional other_config parameters on ports and interfaces.
These changes should have updated the vswitch.ovsschema version
number.

Requested-by: Jeremy Stribling <strib@nicira.com>
---
 vswitchd/vswitch.ovsschema | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 90fedd98a..c5255dd9e 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "3.3.0",
- "cksum": "1105667635 15276",
+ "version": "3.4.0",
+ "cksum": "3926696578 15276",
  "tables": {
    "Open_vSwitch": {
      "columns": {

From 67416220720fe0121d4019ad72b1e94b6211618f Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 3 May 2011 10:51:06 -0700
Subject: [PATCH 23/40] xenserver: Use .../extra not .../kernel/extra for
 kernel modules.

On XenServer, depmod.conf causes modules in /lib/modules/$(uname -r)/extra
to take priority over standard modules.  Unfortunately, we were installing
our modules in /lib/modules/$(uname -r)/kernel/extra, which isn't special.
This commit fixes the problem.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Reported-by: Bob Ball <bob.ball@citrix.com>
---
 AUTHORS                        | 1 +
 xenserver/openvswitch-xen.spec | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index 38ca1422c..ba30f5000 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -44,6 +44,7 @@ Aaron M. Ucko           ucko@debian.org
 Aaron Rosen             arosen@clemson.edu
 Alex Yip                alex@nicira.com
 Alexey I. Froloff       raorn@altlinux.org
+Bob Ball                bob.ball@citrix.com
 Brad Hall               brad@nicira.com
 Brandon Heller          brandonh@stanford.edu
 Bryan Fulton            bryan@nicira.com
diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec
index c428fc6de..b7b987e32 100644
--- a/xenserver/openvswitch-xen.spec
+++ b/xenserver/openvswitch-xen.spec
@@ -112,8 +112,8 @@ install -m 644 \
         xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py \
                $RPM_BUILD_ROOT/usr/lib/xsconsole/plugins-base/XSFeatureVSwitch.py
 
-install -d -m 755 $RPM_BUILD_ROOT/lib/modules/%{xen_version}/kernel/extra/openvswitch
-find datapath/linux-2.6 -name *.ko -exec install -m 755  \{\} $RPM_BUILD_ROOT/lib/modules/%{xen_version}/kernel/extra/openvswitch \;
+install -d -m 755 $RPM_BUILD_ROOT/lib/modules/%{xen_version}/extra/openvswitch
+find datapath/linux-2.6 -name *.ko -exec install -m 755  \{\} $RPM_BUILD_ROOT/lib/modules/%{xen_version}/extra/openvswitch \;
 install xenserver/uuid.py $RPM_BUILD_ROOT/usr/share/openvswitch/python
 
 # Get rid of stuff we don't want to make RPM happy.
@@ -397,5 +397,5 @@ fi
 %exclude /usr/share/openvswitch/python/ovs/db/*.py[co]
 
 %files %{module_package}
-/lib/modules/%{xen_version}/kernel/extra/openvswitch/openvswitch_mod.ko
-%exclude /lib/modules/%{xen_version}/kernel/extra/openvswitch/brcompat_mod.ko
+/lib/modules/%{xen_version}/extra/openvswitch/openvswitch_mod.ko
+%exclude /lib/modules/%{xen_version}/extra/openvswitch/brcompat_mod.ko

From a16a9d0392af597353048419ef614ca2d5ca0afd Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 3 May 2011 10:30:17 -0700
Subject: [PATCH 24/40] ovs-brcompatd: Document bug.

Reported-by: Gregor Schaffrath <grsch@net.t-labs.tu-berlin.de>
---
 AUTHORS                     | 1 +
 vswitchd/ovs-brcompatd.8.in | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/AUTHORS b/AUTHORS
index ba30f5000..1aa1cb3c1 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -56,6 +56,7 @@ DK Moon                 dkmoon@nicira.com
 Gaetano Catalli         gaetano.catalli@gmail.com
 Ghanem Bahri            bahri.ghanem@gmail.com
 Giuseppe de Candia      giuseppe.decandia@gmail.com
+Gregor Schaffrath       grsch@net.t-labs.tu-berlin.de
 Hassan Khan             hassan.khan@seecs.edu.pk
 Hector Oron             hector.oron@gmail.com
 Henrik Amren            henrik@nicira.com
diff --git a/vswitchd/ovs-brcompatd.8.in b/vswitchd/ovs-brcompatd.8.in
index 0cce84d7a..2ce164c3a 100644
--- a/vswitchd/ovs-brcompatd.8.in
+++ b/vswitchd/ovs-brcompatd.8.in
@@ -52,6 +52,12 @@ pruned.  The default prune timeout is 5 seconds.
 .so lib/common.man
 .so lib/leak-checker.man
 .
+.SH BUGS
+.
+\fBovs\-brcompatd\fR requires the bridges that it manages to initially
+have no ports listed in their database records when it starts up.
+Otherwise, it may add duplicate ports to bridges.
+.
 .SH NOTES
 \fBovs\-brcompatd\fR requires the \fBbrcompat_mod.ko\fR kernel module to be
 loaded.

From 49b2c2d00e1c78427d669cd32520f7fac657611b Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 3 May 2011 11:03:08 -0700
Subject: [PATCH 25/40] xenserver: Don't remove network.dbcache on uninstall.

network.dbcache was introduced by Open vSwitch for its own purposes, but
it has now migrated into the base install of XenServer, which uses it
whether Open vSwitch is installed or not, so we should no longer remove it
on package uninstall.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Reported-by: Bob Ball <bob.ball@citrix.com>
---
 xenserver/openvswitch-xen.spec | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec
index b7b987e32..97bf99ca0 100644
--- a/xenserver/openvswitch-xen.spec
+++ b/xenserver/openvswitch-xen.spec
@@ -305,7 +305,6 @@ if [ "$1" = "0" ]; then     # $1 = 1 for upgrade
     rm -f /etc/openvswitch/conf.db
     rm -f /etc/sysconfig/openvswitch
     rm -f /etc/openvswitch/vswitchd.cacert
-    rm -f /var/xapi/network.dbcache
 
     # Remove saved XenServer scripts directory, but only if it's empty
     rmdir -p /usr/lib/openvswitch/xs-saved 2>/dev/null

From 2ba7924fd672d1edcaa15d533d394850074ddec4 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 4 May 2011 09:58:30 -0700
Subject: [PATCH 26/40] ovs-tcpundump: Document that ovs-appctl sends
 ofproto/trace command.

Suggested-by: Reid Price <reid@nicira.com>
Bug #5538.
---
 utilities/ovs-tcpundump.1.in | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/utilities/ovs-tcpundump.1.in b/utilities/ovs-tcpundump.1.in
index e59f6dba7..0837dc330 100644
--- a/utilities/ovs-tcpundump.1.in
+++ b/utilities/ovs-tcpundump.1.in
@@ -12,7 +12,8 @@ The \fBovs\-tcpundump\fR program reads \fBtcpdump \-xx\fR output on
 stdin, looking for hexadecimal packet data, and dumps each Ethernet as
 a single hexadecimal string on stdout.  This format is suitable for
 use with the \fBofproto/trace\fR command supported by
-\fBovs\-vswitchd\fR(8) and \fBovs-openflowd\fR(8).
+\fBovs\-vswitchd\fR(8) and \fBovs-openflowd\fR(8)
+via \fBovs\-appctl\fR(8).
 .PP
 At least two \fB\-x\fR or \fB\-X\fR options must be given, otherwise
 the output will omit the Ethernet header, which prevents the output
@@ -23,6 +24,7 @@ from being using with \fBofproto/trace\fR.
 .
 .SH "SEE ALSO"
 .
+.BR ovs\-appctl (8),
 .BR ovs\-vswitchd (8),
 .BR ovs\-openflowd (8),
 .BR ovs\-pcap (1),

From 946350dce6c717af540f2bff68f1012fc8414d86 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 4 May 2011 13:46:21 -0700
Subject: [PATCH 27/40] DESIGN: Move in-band control design discussion here.

It seems more likely that interested users and administrators will be able
to find it here.
---
 DESIGN            | 163 ++++++++++++++++++++++++++++++++++++++++++++++
 ofproto/in-band.c | 160 ---------------------------------------------
 2 files changed, 163 insertions(+), 160 deletions(-)

diff --git a/DESIGN b/DESIGN
index 6e25f01bf..2e3fced80 100644
--- a/DESIGN
+++ b/DESIGN
@@ -71,6 +71,169 @@ nodes that do not connect to link with such large MTUs.  Currently, Open
 vSwitch doesn't process jumbograms.
 
 
+In-Band Control
+===============
+
+In-band control allows a single network to be used for OpenFlow traffic and
+other data traffic.  See ovs-vswitchd.conf.db(5) for a description of
+configuring in-band control.
+
+This comment is an attempt to describe how in-band control works at a
+wire- and implementation-level.  Correctly implementing in-band
+control has proven difficult due to its many subtleties, and has thus
+gone through many iterations.  Please read through and understand the
+reasoning behind the chosen rules before making modifications.
+
+In Open vSwitch, in-band control is implemented as "hidden" flows (in that
+they are not visible through OpenFlow) and at a higher priority than
+wildcarded flows can be set up by through OpenFlow.  This is done so that
+the OpenFlow controller cannot interfere with them and possibly break
+connectivity with its switches.  It is possible to see all flows, including
+in-band ones, with the ovs-appctl "bridge/dump-flows" command.
+
+The Open vSwitch implementation of in-band control can hide traffic to
+arbitrary "remotes", where each remote is one TCP port on one IP address.
+Currently the remotes are automatically configured as the in-band OpenFlow
+controllers plus the OVSDB managers, if any.  (The latter is a requirement
+because OVSDB managers are responsible for configuring OpenFlow controllers,
+so if the manager cannot be reached then OpenFlow cannot be reconfigured.)
+
+The following rules (with the OFPP_NORMAL action) are set up on any bridge
+that has any remotes:
+
+   (a) DHCP requests sent from the local port.
+   (b) ARP replies to the local port's MAC address.
+   (c) ARP requests from the local port's MAC address.
+
+In-band also sets up the following rules for each unique next-hop MAC
+address for the remotes' IPs (the "next hop" is either the remote
+itself, if it is on a local subnet, or the gateway to reach the remote):
+
+   (d) ARP replies to the next hop's MAC address.
+   (e) ARP requests from the next hop's MAC address.
+
+In-band also sets up the following rules for each unique remote IP address:
+
+   (f) ARP replies containing the remote's IP address as a target.
+   (g) ARP requests containing the remote's IP address as a source.
+
+In-band also sets up the following rules for each unique remote (IP,port)
+pair:
+
+   (h) TCP traffic to the remote's IP and port.
+   (i) TCP traffic from the remote's IP and port.
+
+The goal of these rules is to be as narrow as possible to allow a
+switch to join a network and be able to communicate with the
+remotes.  As mentioned earlier, these rules have higher priority
+than the controller's rules, so if they are too broad, they may
+prevent the controller from implementing its policy.  As such,
+in-band actively monitors some aspects of flow and packet processing
+so that the rules can be made more precise.
+
+In-band control monitors attempts to add flows into the datapath that
+could interfere with its duties.  The datapath only allows exact
+match entries, so in-band control is able to be very precise about
+the flows it prevents.  Flows that miss in the datapath are sent to
+userspace to be processed, so preventing these flows from being
+cached in the "fast path" does not affect correctness.  The only type
+of flow that is currently prevented is one that would prevent DHCP
+replies from being seen by the local port.  For example, a rule that
+forwarded all DHCP traffic to the controller would not be allowed,
+but one that forwarded to all ports (including the local port) would.
+
+As mentioned earlier, packets that miss in the datapath are sent to
+the userspace for processing.  The userspace has its own flow table,
+the "classifier", so in-band checks whether any special processing
+is needed before the classifier is consulted.  If a packet is a DHCP
+response to a request from the local port, the packet is forwarded to
+the local port, regardless of the flow table.  Note that this requires
+L7 processing of DHCP replies to determine whether the 'chaddr' field
+matches the MAC address of the local port.
+
+It is interesting to note that for an L3-based in-band control
+mechanism, the majority of rules are devoted to ARP traffic.  At first
+glance, some of these rules appear redundant.  However, each serves an
+important role.  First, in order to determine the MAC address of the
+remote side (controller or gateway) for other ARP rules, we must allow
+ARP traffic for our local port with rules (b) and (c).  If we are
+between a switch and its connection to the remote, we have to
+allow the other switch's ARP traffic to through.  This is done with
+rules (d) and (e), since we do not know the addresses of the other
+switches a priori, but do know the remote's or gateway's.  Finally,
+if the remote is running in a local guest VM that is not reached
+through the local port, the switch that is connected to the VM must
+allow ARP traffic based on the remote's IP address, since it will
+not know the MAC address of the local port that is sending the traffic
+or the MAC address of the remote in the guest VM.
+
+With a few notable exceptions below, in-band should work in most
+network setups.  The following are considered "supported' in the
+current implementation:
+
+   - Locally Connected.  The switch and remote are on the same
+     subnet.  This uses rules (a), (b), (c), (h), and (i).
+
+   - Reached through Gateway.  The switch and remote are on
+     different subnets and must go through a gateway.  This uses
+     rules (a), (b), (c), (h), and (i).
+
+   - Between Switch and Remote.  This switch is between another
+     switch and the remote, and we want to allow the other
+     switch's traffic through.  This uses rules (d), (e), (h), and
+     (i).  It uses (b) and (c) indirectly in order to know the MAC
+     address for rules (d) and (e).  Note that DHCP for the other
+     switch will not work unless an OpenFlow controller explicitly lets this
+     switch pass the traffic.
+
+   - Between Switch and Gateway.  This switch is between another
+     switch and the gateway, and we want to allow the other switch's
+     traffic through.  This uses the same rules and logic as the
+     "Between Switch and Remote" configuration described earlier.
+
+   - Remote on Local VM.  The remote is a guest VM on the
+     system running in-band control.  This uses rules (a), (b), (c),
+     (h), and (i).
+
+   - Remote on Local VM with Different Networks.  The remote
+     is a guest VM on the system running in-band control, but the
+     local port is not used to connect to the remote.  For
+     example, an IP address is configured on eth0 of the switch.  The
+     remote's VM is connected through eth1 of the switch, but an
+     IP address has not been configured for that port on the switch.
+     As such, the switch will use eth0 to connect to the remote,
+     and eth1's rules about the local port will not work.  In the
+     example, the switch attached to eth0 would use rules (a), (b),
+     (c), (h), and (i) on eth0.  The switch attached to eth1 would use
+     rules (f), (g), (h), and (i).
+
+The following are explicitly *not* supported by in-band control:
+
+   - Specify Remote by Name.  Currently, the remote must be
+     identified by IP address.  A naive approach would be to permit
+     all DNS traffic.  Unfortunately, this would prevent the
+     controller from defining any policy over DNS.  Since switches
+     that are located behind us need to connect to the remote,
+     in-band cannot simply add a rule that allows DNS traffic from
+     the local port.  The "correct" way to support this is to parse
+     DNS requests to allow all traffic related to a request for the
+     remote's name through.  Due to the potential security
+     problems and amount of processing, we decided to hold off for
+     the time-being.
+
+   - Differing Remotes for Switches.  All switches must know
+     the L3 addresses for all the remotes that other switches
+     may use, since rules need to be set up to allow traffic related
+     to those remotes through.  See rules (f), (g), (h), and (i).
+
+   - Differing Routes for Switches.  In order for the switch to
+     allow other switches to connect to a remote through a
+     gateway, it allows the gateway's traffic through with rules (d)
+     and (e).  If the routes to the remote differ for the two
+     switches, we will not know the MAC address of the alternate
+     gateway.
+
+
 Suggestions
 ===========
 
diff --git a/ofproto/in-band.c b/ofproto/in-band.c
index e75d19eac..9aa03afc0 100644
--- a/ofproto/in-band.c
+++ b/ofproto/in-band.c
@@ -40,166 +40,6 @@
 
 VLOG_DEFINE_THIS_MODULE(in_band);
 
-/* In-band control allows a single network to be used for OpenFlow traffic and
- * other data traffic.  See ovs-vswitchd.conf.db(5) for a description of
- * configuring in-band control.
- *
- * This comment is an attempt to describe how in-band control works at a
- * wire- and implementation-level.  Correctly implementing in-band
- * control has proven difficult due to its many subtleties, and has thus
- * gone through many iterations.  Please read through and understand the
- * reasoning behind the chosen rules before making modifications.
- *
- * In Open vSwitch, in-band control is implemented as "hidden" flows (in that
- * they are not visible through OpenFlow) and at a higher priority than
- * wildcarded flows can be set up by through OpenFlow.  This is done so that
- * the OpenFlow controller cannot interfere with them and possibly break
- * connectivity with its switches.  It is possible to see all flows, including
- * in-band ones, with the ovs-appctl "bridge/dump-flows" command.
- *
- * The Open vSwitch implementation of in-band control can hide traffic to
- * arbitrary "remotes", where each remote is one TCP port on one IP address.
- * Currently the remotes are automatically configured as the in-band OpenFlow
- * controllers plus the OVSDB managers, if any.  (The latter is a requirement
- * because OVSDB managers are responsible for configuring OpenFlow controllers,
- * so if the manager cannot be reached then OpenFlow cannot be reconfigured.)
- *
- * The following rules (with the OFPP_NORMAL action) are set up on any bridge
- * that has any remotes:
- *
- *    (a) DHCP requests sent from the local port.
- *    (b) ARP replies to the local port's MAC address.
- *    (c) ARP requests from the local port's MAC address.
- *
- * In-band also sets up the following rules for each unique next-hop MAC
- * address for the remotes' IPs (the "next hop" is either the remote
- * itself, if it is on a local subnet, or the gateway to reach the remote):
- *
- *    (d) ARP replies to the next hop's MAC address.
- *    (e) ARP requests from the next hop's MAC address.
- *
- * In-band also sets up the following rules for each unique remote IP address:
- *
- *    (f) ARP replies containing the remote's IP address as a target.
- *    (g) ARP requests containing the remote's IP address as a source.
- *
- * In-band also sets up the following rules for each unique remote (IP,port)
- * pair:
- *
- *    (h) TCP traffic to the remote's IP and port.
- *    (i) TCP traffic from the remote's IP and port.
- *
- * The goal of these rules is to be as narrow as possible to allow a
- * switch to join a network and be able to communicate with the
- * remotes.  As mentioned earlier, these rules have higher priority
- * than the controller's rules, so if they are too broad, they may
- * prevent the controller from implementing its policy.  As such,
- * in-band actively monitors some aspects of flow and packet processing
- * so that the rules can be made more precise.
- *
- * In-band control monitors attempts to add flows into the datapath that
- * could interfere with its duties.  The datapath only allows exact
- * match entries, so in-band control is able to be very precise about
- * the flows it prevents.  Flows that miss in the datapath are sent to
- * userspace to be processed, so preventing these flows from being
- * cached in the "fast path" does not affect correctness.  The only type
- * of flow that is currently prevented is one that would prevent DHCP
- * replies from being seen by the local port.  For example, a rule that
- * forwarded all DHCP traffic to the controller would not be allowed,
- * but one that forwarded to all ports (including the local port) would.
- *
- * As mentioned earlier, packets that miss in the datapath are sent to
- * the userspace for processing.  The userspace has its own flow table,
- * the "classifier", so in-band checks whether any special processing
- * is needed before the classifier is consulted.  If a packet is a DHCP
- * response to a request from the local port, the packet is forwarded to
- * the local port, regardless of the flow table.  Note that this requires
- * L7 processing of DHCP replies to determine whether the 'chaddr' field
- * matches the MAC address of the local port.
- *
- * It is interesting to note that for an L3-based in-band control
- * mechanism, the majority of rules are devoted to ARP traffic.  At first
- * glance, some of these rules appear redundant.  However, each serves an
- * important role.  First, in order to determine the MAC address of the
- * remote side (controller or gateway) for other ARP rules, we must allow
- * ARP traffic for our local port with rules (b) and (c).  If we are
- * between a switch and its connection to the remote, we have to
- * allow the other switch's ARP traffic to through.  This is done with
- * rules (d) and (e), since we do not know the addresses of the other
- * switches a priori, but do know the remote's or gateway's.  Finally,
- * if the remote is running in a local guest VM that is not reached
- * through the local port, the switch that is connected to the VM must
- * allow ARP traffic based on the remote's IP address, since it will
- * not know the MAC address of the local port that is sending the traffic
- * or the MAC address of the remote in the guest VM.
- *
- * With a few notable exceptions below, in-band should work in most
- * network setups.  The following are considered "supported' in the
- * current implementation:
- *
- *    - Locally Connected.  The switch and remote are on the same
- *      subnet.  This uses rules (a), (b), (c), (h), and (i).
- *
- *    - Reached through Gateway.  The switch and remote are on
- *      different subnets and must go through a gateway.  This uses
- *      rules (a), (b), (c), (h), and (i).
- *
- *    - Between Switch and Remote.  This switch is between another
- *      switch and the remote, and we want to allow the other
- *      switch's traffic through.  This uses rules (d), (e), (h), and
- *      (i).  It uses (b) and (c) indirectly in order to know the MAC
- *      address for rules (d) and (e).  Note that DHCP for the other
- *      switch will not work unless an OpenFlow controller explicitly lets this
- *      switch pass the traffic.
- *
- *    - Between Switch and Gateway.  This switch is between another
- *      switch and the gateway, and we want to allow the other switch's
- *      traffic through.  This uses the same rules and logic as the
- *      "Between Switch and Remote" configuration described earlier.
- *
- *    - Remote on Local VM.  The remote is a guest VM on the
- *      system running in-band control.  This uses rules (a), (b), (c),
- *      (h), and (i).
- *
- *    - Remote on Local VM with Different Networks.  The remote
- *      is a guest VM on the system running in-band control, but the
- *      local port is not used to connect to the remote.  For
- *      example, an IP address is configured on eth0 of the switch.  The
- *      remote's VM is connected through eth1 of the switch, but an
- *      IP address has not been configured for that port on the switch.
- *      As such, the switch will use eth0 to connect to the remote,
- *      and eth1's rules about the local port will not work.  In the
- *      example, the switch attached to eth0 would use rules (a), (b),
- *      (c), (h), and (i) on eth0.  The switch attached to eth1 would use
- *      rules (f), (g), (h), and (i).
- *
- * The following are explicitly *not* supported by in-band control:
- *
- *    - Specify Remote by Name.  Currently, the remote must be
- *      identified by IP address.  A naive approach would be to permit
- *      all DNS traffic.  Unfortunately, this would prevent the
- *      controller from defining any policy over DNS.  Since switches
- *      that are located behind us need to connect to the remote,
- *      in-band cannot simply add a rule that allows DNS traffic from
- *      the local port.  The "correct" way to support this is to parse
- *      DNS requests to allow all traffic related to a request for the
- *      remote's name through.  Due to the potential security
- *      problems and amount of processing, we decided to hold off for
- *      the time-being.
- *
- *    - Differing Remotes for Switches.  All switches must know
- *      the L3 addresses for all the remotes that other switches
- *      may use, since rules need to be set up to allow traffic related
- *      to those remotes through.  See rules (f), (g), (h), and (i).
- *
- *    - Differing Routes for Switches.  In order for the switch to
- *      allow other switches to connect to a remote through a
- *      gateway, it allows the gateway's traffic through with rules (d)
- *      and (e).  If the routes to the remote differ for the two
- *      switches, we will not know the MAC address of the alternate
- *      gateway.
- */
-
 /* Priorities used in classifier for in-band rules.  These values are higher
  * than any that may be set with OpenFlow, and "18" kind of looks like "IB".
  * The ordering of priorities is not important because all of the rules set up

From d93ca2a0fdde23ef3e61bc34a9809f6c38239c45 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Wed, 4 May 2011 15:47:27 -0700
Subject: [PATCH 28/40] ofp-util: Fix validation of OFPAT_SET_VLAN_PCP actions.

Found by sparse.
---
 lib/ofp-util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 5aa2b825e..ddac77270 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -1922,7 +1922,7 @@ check_action(const union ofp_action *a, unsigned int len,
         if (error) {
             return error;
         }
-        if (a->vlan_vid.vlan_vid & ~7) {
+        if (a->vlan_pcp.vlan_pcp & ~7) {
             return ofp_mkerr(OFPET_BAD_ACTION, OFPBAC_BAD_ARGUMENT);
         }
         return 0;

From daf2ebb30c2af023970172969366dea9ac787d7d Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Tue, 26 Apr 2011 19:58:19 -0700
Subject: [PATCH 29/40] xenserver: Use xe-switch-network-stack in RPM spec
 file.

The proper way to switch the networking back-end is to use the
"xe-switch-network-stack" command rather than directly modifying
"/etc/xensource/network.conf".  Use that method in the spec file.
---
 xenserver/openvswitch-xen.spec | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec
index 97bf99ca0..de702addb 100644
--- a/xenserver/openvswitch-xen.spec
+++ b/xenserver/openvswitch-xen.spec
@@ -226,12 +226,7 @@ done
 
 if [ "$1" = "1" ]; then    # $1 = 2 for upgrade
     # Configure system to use Open vSwitch
-    echo vswitch > /etc/xensource/network.conf
-
-    printf "\nYou MUST reboot the server NOW to complete the change to\n"
-    printf "Open vSwitch.  Attempts to modify networking on the server\n"
-    printf "or any hosted VM will fail until after the reboot and could\n"
-    printf "leave the server in a state requiring manual recovery.\n\n"
+    xe-switch-network-backend vswitch
 else
 
     mode=$(cat /etc/xensource/network.conf)
@@ -310,12 +305,7 @@ if [ "$1" = "0" ]; then     # $1 = 1 for upgrade
     rmdir -p /usr/lib/openvswitch/xs-saved 2>/dev/null
 
     # Configure system to use bridge
-    echo bridge > /etc/xensource/network.conf
-
-    printf "\nYou MUST reboot the server now to complete the change to\n"
-    printf "standard Xen networking.  Attempts to modify networking on the\n"
-    printf "server or any hosted VM will fail until after the reboot and\n"
-    printf "could leave the server in a state requiring manual recovery.\n\n"
+    xe-switch-network-backend bridge
 fi
 
 %files

From 752f2093e1b1f591744f8084c90eac7073dfdb87 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Tue, 3 May 2011 23:16:46 -0700
Subject: [PATCH 30/40] xenserver: Better document scriplet action in RPM spec
 file.

---
 xenserver/openvswitch-xen.spec | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec
index de702addb..7409fdfc9 100644
--- a/xenserver/openvswitch-xen.spec
+++ b/xenserver/openvswitch-xen.spec
@@ -224,10 +224,10 @@ for s in openvswitch openvswitch-xapi-update; do
     chkconfig $s on || printf "Could not enable $s init script."
 done
 
-if [ "$1" = "1" ]; then    # $1 = 2 for upgrade
+if [ "$1" = "1" ]; then    # $1 = 1 for install
     # Configure system to use Open vSwitch
     xe-switch-network-backend vswitch
-else
+else    # $1 = 2 for upgrade
 
     mode=$(cat /etc/xensource/network.conf)
     if [ "$mode" != "vswitch" ] && [ "$mode" != "openvswitch" ]; then
@@ -247,7 +247,7 @@ fi
 depmod %{xen_version}
 
 %preun
-if [ "$1" = "0" ]; then     # $1 = 1 for upgrade
+if [ "$1" = "0" ]; then     # $1 = 0 for uninstall
     for s in openvswitch openvswitch-xapi-update; do
         chkconfig --del $s || printf "Could not remove $s init script."
     done
@@ -285,7 +285,7 @@ do
     fi
 done
 
-if [ "$1" = "0" ]; then     # $1 = 1 for upgrade
+if [ "$1" = "0" ]; then     # $1 = 0 for uninstall
     rm -f /usr/lib/xsconsole/plugins-base/XSFeatureVSwitch.pyc \
         /usr/lib/xsconsole/plugins-base/XSFeatureVSwitch.pyo
 

From 75135fa0b5dcc4042956a849e667fd71fab5c741 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Thu, 5 May 2011 14:27:38 -0700
Subject: [PATCH 31/40] bond: Convert stb_id to 32bit parameter.

The 16 bits currently in use is artificially restrictive.
---
 lib/bond.c | 4 ++--
 lib/bond.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/bond.c b/lib/bond.c
index d7242fae2..2b540523e 100644
--- a/lib/bond.c
+++ b/lib/bond.c
@@ -74,7 +74,7 @@ struct bond_slave {
     uint64_t tx_bytes;          /* Sum across 'tx_bytes' of entries. */
 
     /* BM_STABLE specific bonding info. */
-    uint16_t stb_id;            /* ID used for 'stb_slaves' ordering. */
+    uint32_t stb_id;            /* ID used for 'stb_slaves' ordering. */
 };
 
 /* A bond, that is, a set of network devices grouped to improve performance or
@@ -351,7 +351,7 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
  * 'slave_' or destroying 'bond'.
  */
 void
-bond_slave_register(struct bond *bond, void *slave_, uint16_t stb_id,
+bond_slave_register(struct bond *bond, void *slave_, uint32_t stb_id,
                     struct netdev *netdev)
 {
     struct bond_slave *slave = bond_slave_lookup(bond, slave_);
diff --git a/lib/bond.h b/lib/bond.h
index 5847abc10..c2efb8b02 100644
--- a/lib/bond.h
+++ b/lib/bond.h
@@ -75,7 +75,7 @@ void bond_destroy(struct bond *);
 
 bool bond_reconfigure(struct bond *, const struct bond_settings *);
 void bond_slave_register(struct bond *, void *slave_,
-                         uint16_t stable_id, struct netdev *);
+                         uint32_t stable_id, struct netdev *);
 void bond_slave_unregister(struct bond *, const void *slave);
 
 void bond_run(struct bond *, struct tag_set *, bool lacp_negotiated);

From 632e2b95c94099b78a1a3550b865898704bc8b97 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Thu, 5 May 2011 16:01:11 -0700
Subject: [PATCH 32/40] bond: Create new "bond-stable-id".

Stable bonding mode needs an ID to guarantee consistent slave
selection decisions across ovs-vswitchd instances.  Before this
patch, we used the lacp-port-id for this purpose.  However, LACP
places restrictions on how lacp-port-ids can be allocated which may
be inconvenient.  This patch creates a special purpose
bond-stable-id other_config setting which allows users to tweak
this value directly.
---
 vswitchd/bridge.c          | 12 +++++++++---
 vswitchd/vswitch.ovsschema |  4 ++--
 vswitchd/vswitch.xml       | 15 +++++++++++----
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index b2dc55fb8..09fc37d02 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -3288,9 +3288,15 @@ port_reconfigure_bond(struct port *port)
     }
 
     LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
-        uint16_t stable_id = (port->lacp
-                              ? lacp_slave_get_port_id(port->lacp, iface)
-                              : iface->dp_ifidx);
+        long long stable_id;
+
+        stable_id = atoll(get_interface_other_config(iface->cfg,
+                                                     "bond-stable-id", "0"));
+
+        if (stable_id <= 0 || stable_id >= UINT32_MAX) {
+            stable_id = odp_port_to_ofp_port(iface->dp_ifidx);
+        }
+
         bond_slave_register(iface->port->bond, iface, stable_id,
                             iface->netdev);
     }
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index c5255dd9e..56c77ac60 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "3.4.0",
- "cksum": "3926696578 15276",
+ "version": "3.4.1",
+ "cksum": "7815264 15276",
  "tables": {
    "Open_vSwitch": {
      "columns": {
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 1db89dc5b..0f455000b 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -538,10 +538,10 @@
             balancing is done.  Uses a similar hashing strategy to
             <code>balance-tcp</code>, falling back to <code>balance-slb</code>
             style hashing when LACP negotiations are unsuccessful.</p>
-          <p>Slave selection decisions are made based on LACP port ID when LACP
-            negotiations are successful, falling back to openflow port number
-            when unsuccessful.  Thus, decisions are consistent across all
-            ovs-vswitchd instances with equivalent port IDs.</p>
+          <p>Slave selection decisions are made based on
+            <code>bond-stable-id</code> if set.  Otherwise, OpenFlow port
+            number is used.  Decisions are consistent across all ovs-vswitchd
+            instances with equivalent <code>bond-stable-id</code>s.</p>
         </dd>
       </dl>
 
@@ -1327,6 +1327,13 @@
       <column name="other_config">
         Key-value pairs for rarely used interface features.
         <dl>
+          <dt><code>bond-stable-id</code></dt>
+          <dd> A positive integer using in <code>stable</code> bond mode to
+            make slave selection decisions.  Allocating
+            <code>bond-stable-id</code>s consistently across interfaces
+            participating in a bond will guarantee consistent slave selection
+            decisions across ovs-vswitchd instances when using
+            <code>stable</code> bonding mode.</dd>
           <dt><code>lacp-port-id</code></dt>
           <dd> The LACP port ID of this <ref table="Interface"/>.  Port IDs are
             used in LACP negotiations to identify individual ports

From cc8d12f9364760456c86e92378d529c0dd9aaa66 Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Thu, 5 May 2011 16:52:56 -0700
Subject: [PATCH 33/40] lacp: New "lacp-heartbeat" mode.

This commit creates a new heartbeat mode for LACP.  This mode
treats LACP as a protocol simply for monitoring link status.  It
strips out most of the sanity checks built into the protocol.
Addition of this mode makes "lacp-force-aggregatable" and
"lacp-strict" options obsolete so they are removed.
---
 lib/lacp.c                 | 25 +++++++++++++------------
 lib/lacp.h                 |  3 +--
 vswitchd/bridge.c          |  8 ++------
 vswitchd/vswitch.ovsschema |  4 ++--
 vswitchd/vswitch.xml       | 13 ++++---------
 5 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/lib/lacp.c b/lib/lacp.c
index 87f70bf77..3fe5eff32 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -50,10 +50,9 @@ struct lacp {
 
     enum lacp_time lacp_time;  /* Fast, Slow or Custom LACP time. */
     long long int custom_time; /* LACP_TIME_CUSTOM transmission rate. */
-    bool strict;             /* True if in strict mode. */
     bool negotiated;         /* True if LACP negotiations were successful. */
     bool update;             /* True if lacp_update() needs to be called. */
-    bool force_agg;          /* Forces LACP_STATE_AGG bit on all slaves. */
+    bool heartbeat;          /* LACP heartbeat mode. */
 };
 
 struct slave {
@@ -183,16 +182,15 @@ lacp_configure(struct lacp *lacp, const struct lacp_settings *s)
 
     if (!eth_addr_equals(lacp->sys_id, s->id)
         || lacp->sys_priority != s->priority
-        || lacp->strict != s->strict) {
+        || lacp->heartbeat != s->heartbeat) {
         memcpy(lacp->sys_id, s->id, ETH_ADDR_LEN);
         lacp->sys_priority = s->priority;
-        lacp->strict = s->strict;
+        lacp->heartbeat = s->heartbeat;
         lacp->update = true;
     }
 
     lacp->active = s->active;
     lacp->lacp_time = s->lacp_time;
-    lacp->force_agg = s->force_agg;
     lacp->custom_time = MAX(TIME_UPDATE_INTERVAL, s->custom_time);
 }
 
@@ -431,6 +429,13 @@ lacp_update_attached(struct lacp *lacp)
     struct lacp_info lead_pri;
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
 
+    if (lacp->heartbeat) {
+        HMAP_FOR_EACH (slave, node, &lacp->slaves) {
+            slave->attached = slave->status != LACP_DEFAULTED;
+        }
+        return;
+    }
+
     lacp->update = false;
 
     lead = NULL;
@@ -471,10 +476,6 @@ lacp_update_attached(struct lacp *lacp)
                 slave->attached = false;
             }
         }
-    } else if (lacp->strict) {
-        HMAP_FOR_EACH (slave, node, &lacp->slaves) {
-            slave->attached = false;
-        }
     }
 }
 
@@ -555,7 +556,7 @@ slave_get_actor(struct slave *slave, struct lacp_info *actor)
         state |= LACP_STATE_EXP;
     }
 
-    if (lacp->force_agg || hmap_count(&lacp->slaves) > 1) {
+    if (lacp->heartbeat || hmap_count(&lacp->slaves) > 1) {
         state |= LACP_STATE_AGG;
     }
 
@@ -715,8 +716,8 @@ lacp_unixctl_show(struct unixctl_conn *conn,
     ds_put_format(&ds, "lacp: %s\n", lacp->name);
 
     ds_put_format(&ds, "\tstatus: %s", lacp->active ? "active" : "passive");
-    if (lacp->strict) {
-        ds_put_cstr(&ds, " strict");
+    if (lacp->heartbeat) {
+        ds_put_cstr(&ds, " heartbeat");
     }
     if (lacp->negotiated) {
         ds_put_cstr(&ds, " negotiated");
diff --git a/lib/lacp.h b/lib/lacp.h
index fb91b4f14..0fb797e89 100644
--- a/lib/lacp.h
+++ b/lib/lacp.h
@@ -88,8 +88,7 @@ struct lacp_settings {
     bool active;
     enum lacp_time lacp_time;
     long long int custom_time;
-    bool strict;
-    bool force_agg;
+    bool heartbeat;
 };
 
 void lacp_init(void);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 09fc37d02..d883596de 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -3196,12 +3196,8 @@ port_reconfigure_lacp(struct port *port)
                   ? priority
                   : UINT16_MAX - !list_is_short(&port->ifaces));
 
-    s.strict = !strcmp(get_port_other_config(port->cfg, "lacp-strict",
-                                             "false"),
-                       "true");
-
-    s.force_agg = !strcmp(get_port_other_config(port->cfg,
-                                                "lacp-force-aggregatable",
+    s.heartbeat = !strcmp(get_port_other_config(port->cfg,
+                                                "lacp-heartbeat",
                                                 "false"), "true");
 
     lacp_time = get_port_other_config(port->cfg, "lacp-time", "slow");
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 56c77ac60..96be69f73 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "3.4.1",
- "cksum": "7815264 15276",
+ "version": "3.4.2",
+ "cksum": "976911089 15276",
  "tables": {
    "Open_vSwitch": {
      "columns": {
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 0f455000b..a16c486cc 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -678,15 +678,10 @@
               something other than <code>fast</code> or <code>slow</code> is
               not supported by the LACP specification.</p>
           </dd>
-          <dt><code>lacp-strict</code></dt>
-          <dd> When <code>true</code>, configures this <ref table="Port"/> to
-            require successful LACP negotiations to enable any slaves.
-            Defaults to <code>false</code> which safely allows LACP to be used
-            with switches that do not support the protocol.</dd>
-          <dt><code>lacp-force-aggregatable</code></dt>
-          <dd> When <code>true</code>, forces all slaves managed by this
-            <ref table="Port"/> to advertise themselves as aggregatable even if
-            they normally wouldn't.  Defaults to <code>false</code>.</dd>
+          <dt><code>lacp-heartbeat</code></dt>
+          <dd> Treats LACP like a simple heartbeat protocol for link state
+            monitoring.  Most features of the LACP protocol are disabled when
+            this mode is in use.</dd>
         </dl>
       </column>
     </group>

From 6f1435fc8f77e925fbdb5e5dad91d61645330c6c Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Mon, 2 May 2011 13:15:59 -0700
Subject: [PATCH 34/40] ofproto: Resubmit statistics improperly account during
 failover.

In some cases, when a facet's actions change because it resubmits
into a different rule, it will account all packets it as accrued
in the datapath to the new rule.  Due to the algorithm we are
using, it is acceptable for a facet to miscount at most 1 second
worth of packets in this manner.  This patch implements the proper
behavior.

Generally speaking, when a facet is facet_put__() into the
datapath, the kernel returns the old flow's statistics so they may
be accounted for in user space.  These statistics are generally
pushed down to the relevant facet's resubmit children.  Before this
patch, facet_put__() did not compensate for the fact that many of
the statistics in the datapath may have been already pushed.
Thus the entire packet count stored in the datapath would be pushed
to its children instead of simply the packets which have accrued
since the last accounting.  This patch fixes the behavior by
subtracting already accounted for packets from the statistics
returned by the datapath.
---
 ofproto/ofproto.c | 46 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 445f0d2bd..4ed291437 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -268,6 +268,7 @@ static void facet_flush_stats(struct ofproto *, struct facet *);
 
 static void facet_make_actions(struct ofproto *, struct facet *,
                                const struct ofpbuf *packet);
+static void facet_reset_dp_stats(struct facet *, struct dpif_flow_stats *);
 static void facet_update_stats(struct ofproto *, struct facet *,
                                const struct dpif_flow_stats *);
 static void facet_push_stats(struct ofproto *, struct facet *);
@@ -1632,6 +1633,12 @@ facet_make_actions(struct ofproto *p, struct facet *facet,
     ofpbuf_delete(odp_actions);
 }
 
+/* Updates 'facet''s flow in the datapath setting its actions to 'actions_len'
+ * bytes of actions in 'actions'.  If 'stats' is non-null, statistics counters
+ * in the datapath will be zeroed and 'stats' will be updated with traffic new
+ * since 'facet' was last updated.
+ *
+ * Returns 0 if successful, otherwise a positive errno value.*/
 static int
 facet_put__(struct ofproto *ofproto, struct facet *facet,
             const struct nlattr *actions, size_t actions_len,
@@ -1640,19 +1647,24 @@ facet_put__(struct ofproto *ofproto, struct facet *facet,
     struct odputil_keybuf keybuf;
     enum dpif_flow_put_flags flags;
     struct ofpbuf key;
+    int ret;
 
     flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
     if (stats) {
         flags |= DPIF_FP_ZERO_STATS;
-        facet->dp_packet_count = 0;
-        facet->dp_byte_count = 0;
     }
 
     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
     odp_flow_key_from_flow(&key, &facet->flow);
 
-    return dpif_flow_put(ofproto->dpif, flags, key.data, key.size,
-                         actions, actions_len, stats);
+    ret = dpif_flow_put(ofproto->dpif, flags, key.data, key.size,
+                        actions, actions_len, stats);
+
+    if (stats) {
+        facet_reset_dp_stats(facet, stats);
+    }
+
+    return ret;
 }
 
 /* If 'facet' is installable, inserts or re-inserts it into 'p''s datapath.  If
@@ -1696,16 +1708,18 @@ facet_uninstall(struct ofproto *p, struct facet *facet)
         struct odputil_keybuf keybuf;
         struct dpif_flow_stats stats;
         struct ofpbuf key;
+        int error;
 
         ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
         odp_flow_key_from_flow(&key, &facet->flow);
 
-        if (!dpif_flow_del(p->dpif, key.data, key.size, &stats)) {
+        error = dpif_flow_del(p->dpif, key.data, key.size, &stats);
+        facet_reset_dp_stats(facet, &stats);
+        if (!error) {
             facet_update_stats(p, facet, &stats);
         }
+
         facet->installed = false;
-        facet->dp_packet_count = 0;
-        facet->dp_byte_count = 0;
     } else {
         assert(facet->dp_packet_count == 0);
         assert(facet->dp_byte_count == 0);
@@ -1724,6 +1738,24 @@ facet_is_controller_flow(struct facet *facet)
                                       htons(OFPP_CONTROLLER)));
 }
 
+/* Resets 'facet''s datapath statistics counters.  This should be called when
+ * 'facet''s statistics are cleared in the datapath.  If 'stats' is non-null,
+ * it should contain the statistics returned by dpif when 'facet' was reset in
+ * the datapath.  'stats' will be modified to only included statistics new
+ * since 'facet' was last updated. */
+static void
+facet_reset_dp_stats(struct facet *facet, struct dpif_flow_stats *stats)
+{
+    if (stats && facet->dp_packet_count < stats->n_packets
+        && facet->dp_byte_count < stats->n_bytes) {
+        stats->n_packets -= facet->dp_packet_count;
+        stats->n_bytes -= facet->dp_byte_count;
+    }
+
+    facet->dp_packet_count = 0;
+    facet->dp_byte_count = 0;
+}
+
 /* Folds all of 'facet''s statistics into its rule.  Also updates the
  * accounting ofhook and emits a NetFlow expiration if appropriate.  All of
  * 'facet''s statistics in the datapath should have been zeroed and folded into

From daa8bd2bf878640556195016df8c5ea2f812f9cc Mon Sep 17 00:00:00 2001
From: Ethan Jackson <ethan@nicira.com>
Date: Fri, 6 May 2011 17:02:02 -0700
Subject: [PATCH 35/40] bridge: Don't configure QoS without Queues.

It doesn't make sense to create a QoS object without any queues.
Before this patch, OVS would configure the QoS object and as a
result drop all traffic going through the affected interface.  With
this patch, OVS will simply clear QoS configuration on the
interface.

Bug #5583.
---
 vswitchd/bridge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index d883596de..56943f671 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -3534,7 +3534,7 @@ iface_delete_queues(unsigned int queue_id,
 static void
 iface_update_qos(struct iface *iface, const struct ovsrec_qos *qos)
 {
-    if (!qos || qos->type[0] == '\0') {
+    if (!qos || qos->type[0] == '\0' || qos->n_queues < 1) {
         netdev_set_qos(iface->netdev, NULL, NULL);
     } else {
         struct iface_delete_queues_cbdata cbdata;

From 6d5abe94343a5c5e8f00efe2871d9825091024eb Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Mon, 9 May 2011 10:29:51 -0700
Subject: [PATCH 36/40] ovs-vsctl: Issue warning for likely erroneous "get"
 commands.

Suggested-by: Reid Price <reid@nicira.com>
Feature #5527.
---
 utilities/ovs-vsctl.8.in |  5 +++++
 utilities/ovs-vsctl.c    | 11 +++++++++++
 2 files changed, 16 insertions(+)

diff --git a/utilities/ovs-vsctl.8.in b/utilities/ovs-vsctl.8.in
index 0b3e164ef..fcfec0246 100644
--- a/utilities/ovs-vsctl.8.in
+++ b/utilities/ovs-vsctl.8.in
@@ -596,6 +596,11 @@ For a map column, without \fB\-\-if\-exists\fR it is an error if
 If \fB@\fIname\fR is specified, then the UUID for \fIrecord\fR may be
 referred to by that name later in the same \fBovs\-vsctl\fR
 invocation in contexts where a UUID is expected.
+.IP
+Both \fB\-\-id\fR and the \fIcolumn\fR arguments are optional, but
+usually at least one or the other should be specified.  If both are
+omitted, then \fBget\fR has no effect except to verify that
+\fIrecord\fR exists in \fItable\fR.
 .
 .IP "\fBset \fItable record column\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR..."
 Sets the value of each specified \fIcolumn\fR in the given
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index 2c1ba6df2..6c2fbece7 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -26,6 +26,7 @@
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 #include "command-line.h"
 #include "compiler.h"
@@ -2547,10 +2548,20 @@ pre_parse_column_key_value(struct vsctl_context *ctx,
 static void
 pre_cmd_get(struct vsctl_context *ctx)
 {
+    const char *id = shash_find_data(&ctx->options, "--id");
     const char *table_name = ctx->argv[1];
     const struct vsctl_table_class *table;
     int i;
 
+    /* Using "get" without --id or a column name could possibly make sense.
+     * Maybe, for example, a ovs-vsctl run wants to assert that a row exists.
+     * But it is unlikely that an interactive user would want to do that, so
+     * issue a warning if we're running on a terminal. */
+    if (!id && ctx->argc <= 3 && isatty(STDOUT_FILENO)) {
+        VLOG_WARN("\"get\" command without row arguments or \"--id\" is "
+                  "possibly erroneous");
+    }
+
     table = pre_get_table(ctx, table_name);
     for (i = 3; i < ctx->argc; i++) {
         if (!strcasecmp(ctx->argv[i], "_uuid")

From be55976089659d082834aae58acd1173f10004e7 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 10 May 2011 09:15:44 -0700
Subject: [PATCH 37/40] INSTALL.XenServer: Document Open vSwitch boot process
 on XenServer.

Inspired by a conversation with David Erickson <derickso@stanford.edu>.
---
 INSTALL.XenServer | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/INSTALL.XenServer b/INSTALL.XenServer
index 48f81785f..9d9012b86 100644
--- a/INSTALL.XenServer
+++ b/INSTALL.XenServer
@@ -69,6 +69,74 @@ To uninstall Open vSwitch from a XenServer host, remove the packages:
 After installing or uninstalling Open vSwitch, the XenServer should be
 rebooted as soon as possible.
 
+Open vSwitch Boot Sequence on XenServer
+---------------------------------------
+
+When Open vSwitch is installed on XenServer, its startup script
+/etc/init.d/openvswitch runs early in boot.  It does roughly the
+following:
+
+        * Loads the OVS kernel module, openvswitch_mod.
+
+        * Starts ovsdb-server, the OVS configuration database.
+
+        * XenServer expects there to be no bridges configured at
+          startup, but the OVS configuration database likely still has
+          bridges configured from before reboot.  To match XenServer
+          expectations, the startup script deletes all configured
+          bridges from the database.
+
+        * Starts ovs-vswitchd, the OVS switching daemon.
+
+At this point in the boot process, then, there are no Open vSwitch
+bridges, even though all of the Open vSwitch daemons are running.
+Later on in boot, /etc/init.d/management-interface (part of XenServer,
+not Open vSwitch) creates the bridge for the XAPI management interface
+by invoking /opt/xensource/libexec/interface-reconfigure.  Normally
+this program consults XAPI's database to obtain information about how
+to configure the bridge, but XAPI is not running yet[*] so it instead
+consults /var/xapi/network.dbcache, which is a cached copy of the most
+recent network configuration.
+
+[*] Even if XAPI were running, if this XenServer node is a pool slave
+    then the query would have to consult the master, which requires
+    network access, which begs the question of how to configure the
+    management interface.
+
+XAPI starts later on in the boot process.  XAPI can then create other
+bridges on demand using /opt/xensource/libexec/interface-reconfigure.
+Now that XAPI is running, that program consults XAPI directly instead
+of reading the cache.
+
+As part of its own startup, XAPI invokes the Open vSwitch XAPI plugin
+script /etc/xapi.d/openvswitch-cfg-update passing the "update"
+command.  The plugin script does roughly the following:
+
+        * Calls /opt/xensource/libexec/interface-reconfigure with the
+          "rewrite" command, to ensure that the network cache is
+          up-to-date.
+
+        * Queries the Open vSwitch manager setting (named
+          "vswitch_controller") from the XAPI database for the
+          XenServer pool.
+
+        * If XAPI and OVS are configured for different managers, or if
+          OVS is configured for a manager but XAPI is not, runs
+          "ovs-vsctl emer-reset" to bring the Open vSwitch
+          configuration to a known state.  One effect of emer-reset is
+          to deconfigure any manager from the OVS database.
+
+        * If XAPI is configured for a manger, configures the OVS
+          manager to match with "ovs-vsctl set-manager".
+
+The Open vSwitch boot sequence only configures an OVS configuration
+database manager.  There is no way to directly configure an OpenFlow
+controller on XenServer and, as a consequence of the step above that
+deletes all of the bridges at boot time, controller configuration only
+persists until XenServer reboot.  The configuration database manager
+can, however, configure controllers for bridges.  See the BUGS section
+of ovs-controller(8) for more information on this topic.
+
 Reporting Bugs
 --------------
 

From bf8f2167fd3107f5513d487a69a6568cf51afd68 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 10 May 2011 09:17:37 -0700
Subject: [PATCH 38/40] stream-ssl: Improve messages when configuring SSL if it
 is unsupported.

Previously, if --private-key or another option that requires SSL support
was used, but OVS was built without OpenSSL support, then OVS would fail
with an error message that the specified option was not supported.  This
confused users because it made them think that the option had been removed:
    http://openvswitch.org/pipermail/discuss/2011-April/005034.html

This commit improves the error message: OVS will now report that it was
built without SSL support.  This should be make the problem clear to users.

Reported-by: Aaron Rosen <arosen@clemson.edu>
Feature #5325.
---
 lib/automake.mk            |  2 +
 lib/stream-nossl.c         | 76 ++++++++++++++++++++++++++++++++++++++
 lib/stream-ssl.h           | 26 ++-----------
 ovsdb/ovsdb-client.c       |  6 +--
 ovsdb/ovsdb-server.c       |  8 ----
 tests/test-jsonrpc.c       |  6 +--
 utilities/ovs-controller.c |  6 +--
 utilities/ovs-ofctl.c      |  2 +-
 utilities/ovs-openflowd.c  |  6 +--
 utilities/ovs-vsctl.c      |  6 +--
 vswitchd/bridge.c          |  4 +-
 vswitchd/ovs-vswitchd.c    |  7 +---
 12 files changed, 92 insertions(+), 63 deletions(-)
 create mode 100644 lib/stream-nossl.c

diff --git a/lib/automake.mk b/lib/automake.mk
index 7c1977f80..efc1fd7b0 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -220,6 +220,8 @@ lib/dhparams.c: lib/dh1024.pem lib/dh2048.pem lib/dh4096.pem
 	 openssl dhparam -C -in $(srcdir)/lib/dh4096.pem -noout)	\
 	| sed 's/\(get_dh[0-9]*\)()/\1(void)/' > lib/dhparams.c.tmp
 	mv lib/dhparams.c.tmp lib/dhparams.c
+else
+lib_libopenvswitch_a_SOURCES += lib/stream-nossl.c
 endif
 
 EXTRA_DIST += \
diff --git a/lib/stream-nossl.c b/lib/stream-nossl.c
new file mode 100644
index 000000000..cdbbf5d7a
--- /dev/null
+++ b/lib/stream-nossl.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2011 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "stream-ssl.h"
+#include "vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(stream_nossl);
+
+/* Dummy function definitions, used when OVS is built without OpenSSL. */
+
+bool
+stream_ssl_is_configured(void)
+{
+    return false;
+}
+
+static void NO_RETURN
+nossl_option(const char *detail)
+{
+    VLOG_FATAL("%s specified but Open vSwitch was built without SSL support",
+               detail);
+}
+
+void
+stream_ssl_set_private_key_file(const char *file_name)
+{
+    if (file_name != NULL) {
+        nossl_option("Private key");
+    }
+}
+
+void
+stream_ssl_set_certificate_file(const char *file_name)
+{
+    if (file_name != NULL) {
+        nossl_option("Certificate");
+    }
+}
+
+void
+stream_ssl_set_ca_cert_file(const char *file_name, bool bootstrap OVS_UNUSED)
+{
+    if (file_name != NULL) {
+        nossl_option("CA certificate");
+    }
+}
+
+void
+stream_ssl_set_peer_ca_cert_file(const char *file_name)
+{
+    if (file_name != NULL) {
+        nossl_option("Peer CA certificate");
+    }
+}
+
+void
+stream_ssl_set_key_and_cert(const char *private_key_file,
+                            const char *certificate_file)
+{
+    stream_ssl_set_private_key_file(private_key_file);
+    stream_ssl_set_certificate_file(certificate_file);
+}
diff --git a/lib/stream-ssl.h b/lib/stream-ssl.h
index 6bea577d3..29c3120fd 100644
--- a/lib/stream-ssl.h
+++ b/lib/stream-ssl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009, 2010 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,28 +18,18 @@
 
 #include <stdbool.h>
 
-#ifdef HAVE_OPENSSL
 bool stream_ssl_is_configured(void);
-
 void stream_ssl_set_private_key_file(const char *file_name);
 void stream_ssl_set_certificate_file(const char *file_name);
 void stream_ssl_set_ca_cert_file(const char *file_name, bool bootstrap);
-
+void stream_ssl_set_peer_ca_cert_file(const char *file_name);
 void stream_ssl_set_key_and_cert(const char *private_key_file,
                                  const char *certificate_file);
 
-
-void stream_ssl_set_peer_ca_cert_file(const char *file_name);
-
-/* Define the long options for SSL support.
- *
- * Note that the definition includes a final comma, and therefore a comma
- * must not be supplied when using the definition.  This is done so that
- * compilation succeeds whether or not HAVE_OPENSSL is defined. */
-#define STREAM_SSL_LONG_OPTIONS                      \
+#define STREAM_SSL_LONG_OPTIONS                     \
         {"private-key", required_argument, 0, 'p'}, \
         {"certificate", required_argument, 0, 'c'}, \
-        {"ca-cert",     required_argument, 0, 'C'},
+        {"ca-cert",     required_argument, 0, 'C'}
 
 #define STREAM_SSL_OPTION_HANDLERS                      \
         case 'p':                                       \
@@ -53,13 +43,5 @@ void stream_ssl_set_peer_ca_cert_file(const char *file_name);
         case 'C':                                       \
             stream_ssl_set_ca_cert_file(optarg, false); \
             break;
-#else /* !HAVE_OPENSSL */
-static inline bool stream_ssl_is_configured(void)
-{
-    return false;
-}
-#define STREAM_SSL_LONG_OPTIONS
-#define STREAM_SSL_OPTION_HANDLERS
-#endif /* !HAVE_OPENSSL */
 
 #endif /* stream-ssl.h */
diff --git a/ovsdb/ovsdb-client.c b/ovsdb/ovsdb-client.c
index a66b013bf..e8afdd6b3 100644
--- a/ovsdb/ovsdb-client.c
+++ b/ovsdb/ovsdb-client.c
@@ -80,9 +80,9 @@ parse_options(int argc, char *argv[])
         DAEMON_LONG_OPTIONS,
 #ifdef HAVE_OPENSSL
         {"bootstrap-ca-cert", required_argument, 0, OPT_BOOTSTRAP_CA_CERT},
-        TABLE_LONG_OPTIONS,
-        STREAM_SSL_LONG_OPTIONS
+        STREAM_SSL_LONG_OPTIONS,
 #endif
+        TABLE_LONG_OPTIONS,
         {0, 0, 0, 0},
     };
     char *short_options = long_options_to_short_options(long_options);
@@ -111,13 +111,11 @@ parse_options(int argc, char *argv[])
 
         TABLE_OPTION_HANDLERS(&table_style)
 
-#ifdef HAVE_OPENSSL
         STREAM_SSL_OPTION_HANDLERS
 
         case OPT_BOOTSTRAP_CA_CERT:
             stream_ssl_set_ca_cert_file(optarg, true);
             break;
-#endif
 
         case '?':
             exit(EXIT_FAILURE);
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index c9b0fdd95..14f0fbfb7 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -51,13 +51,11 @@
 
 VLOG_DEFINE_THIS_MODULE(ovsdb_server);
 
-#if HAVE_OPENSSL
 /* SSL configuration. */
 static char *private_key_file;
 static char *certificate_file;
 static char *ca_cert_file;
 static bool bootstrap_ca_cert;
-#endif
 
 static unixctl_cb_func ovsdb_server_exit;
 static unixctl_cb_func ovsdb_server_compact;
@@ -598,13 +596,11 @@ reconfigure_from_db(struct ovsdb_jsonrpc_server *jsonrpc,
     ovsdb_jsonrpc_server_set_remotes(jsonrpc, &resolved_remotes);
     shash_destroy_free_data(&resolved_remotes);
 
-#if HAVE_OPENSSL
     /* Configure SSL. */
     stream_ssl_set_key_and_cert(query_db_string(db, private_key_file),
                                 query_db_string(db, certificate_file));
     stream_ssl_set_ca_cert_file(query_db_string(db, ca_cert_file),
                                 bootstrap_ca_cert);
-#endif
 }
 
 static void
@@ -671,12 +667,10 @@ parse_options(int argc, char *argv[], char **file_namep,
         DAEMON_LONG_OPTIONS,
         VLOG_LONG_OPTIONS,
         LEAK_CHECKER_LONG_OPTIONS,
-#ifdef HAVE_OPENSSL
         {"bootstrap-ca-cert", required_argument, 0, OPT_BOOTSTRAP_CA_CERT},
         {"private-key", required_argument, 0, 'p'},
         {"certificate", required_argument, 0, 'c'},
         {"ca-cert",     required_argument, 0, 'C'},
-#endif
         {0, 0, 0, 0},
     };
     char *short_options = long_options_to_short_options(long_options);
@@ -714,7 +708,6 @@ parse_options(int argc, char *argv[], char **file_namep,
         DAEMON_OPTION_HANDLERS
         LEAK_CHECKER_OPTION_HANDLERS
 
-#ifdef HAVE_OPENSSL
         case 'p':
             private_key_file = optarg;
             break;
@@ -732,7 +725,6 @@ parse_options(int argc, char *argv[], char **file_namep,
             ca_cert_file = optarg;
             bootstrap_ca_cert = true;
             break;
-#endif
 
         case '?':
             exit(EXIT_FAILURE);
diff --git a/tests/test-jsonrpc.c b/tests/test-jsonrpc.c
index 12bbc975e..5d93850c0 100644
--- a/tests/test-jsonrpc.c
+++ b/tests/test-jsonrpc.c
@@ -60,10 +60,8 @@ parse_options(int argc, char *argv[])
         {"verbose", optional_argument, 0, 'v'},
         {"help", no_argument, 0, 'h'},
         DAEMON_LONG_OPTIONS,
-#ifdef HAVE_OPENSSL
         {"bootstrap-ca-cert", required_argument, 0, OPT_BOOTSTRAP_CA_CERT},
-        STREAM_SSL_LONG_OPTIONS
-#endif
+        STREAM_SSL_LONG_OPTIONS,
         {0, 0, 0, 0},
     };
     char *short_options = long_options_to_short_options(long_options);
@@ -84,13 +82,11 @@ parse_options(int argc, char *argv[])
 
         DAEMON_OPTION_HANDLERS
 
-#ifdef HAVE_OPENSSL
         STREAM_SSL_OPTION_HANDLERS
 
         case OPT_BOOTSTRAP_CA_CERT:
             stream_ssl_set_ca_cert_file(optarg, true);
             break;
-#endif
 
         case '?':
             exit(EXIT_FAILURE);
diff --git a/utilities/ovs-controller.c b/utilities/ovs-controller.c
index ae0ea3da1..3844666fb 100644
--- a/utilities/ovs-controller.c
+++ b/utilities/ovs-controller.c
@@ -324,10 +324,8 @@ parse_options(int argc, char *argv[])
         {"version",     no_argument, 0, 'V'},
         DAEMON_LONG_OPTIONS,
         VLOG_LONG_OPTIONS,
-#ifdef HAVE_OPENSSL
-        STREAM_SSL_LONG_OPTIONS
+        STREAM_SSL_LONG_OPTIONS,
         {"peer-ca-cert", required_argument, 0, OPT_PEER_CA_CERT},
-#endif
         {0, 0, 0, 0},
     };
     char *short_options = long_options_to_short_options(long_options);
@@ -400,13 +398,11 @@ parse_options(int argc, char *argv[])
         VLOG_OPTION_HANDLERS
         DAEMON_OPTION_HANDLERS
 
-#ifdef HAVE_OPENSSL
         STREAM_SSL_OPTION_HANDLERS
 
         case OPT_PEER_CA_CERT:
             stream_ssl_set_peer_ca_cert_file(optarg);
             break;
-#endif
 
         case '?':
             exit(EXIT_FAILURE);
diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c
index c0ff4dc12..6c2ca1766 100644
--- a/utilities/ovs-ofctl.c
+++ b/utilities/ovs-ofctl.c
@@ -92,7 +92,7 @@ parse_options(int argc, char *argv[])
         {"help", no_argument, 0, 'h'},
         {"version", no_argument, 0, 'V'},
         VLOG_LONG_OPTIONS,
-        STREAM_SSL_LONG_OPTIONS
+        STREAM_SSL_LONG_OPTIONS,
         {0, 0, 0, 0},
     };
     char *short_options = long_options_to_short_options(long_options);
diff --git a/utilities/ovs-openflowd.c b/utilities/ovs-openflowd.c
index 86f5ca592..33ebc6815 100644
--- a/utilities/ovs-openflowd.c
+++ b/utilities/ovs-openflowd.c
@@ -277,10 +277,8 @@ parse_options(int argc, char *argv[], struct ofsettings *s)
         DAEMON_LONG_OPTIONS,
         VLOG_LONG_OPTIONS,
         LEAK_CHECKER_LONG_OPTIONS,
-#ifdef HAVE_OPENSSL
-        STREAM_SSL_LONG_OPTIONS
+        STREAM_SSL_LONG_OPTIONS,
         {"bootstrap-ca-cert", required_argument, 0, OPT_BOOTSTRAP_CA_CERT},
-#endif
         {0, 0, 0, 0},
     };
     char *short_options = long_options_to_short_options(long_options);
@@ -448,13 +446,11 @@ parse_options(int argc, char *argv[], struct ofsettings *s)
 
         LEAK_CHECKER_OPTION_HANDLERS
 
-#ifdef HAVE_OPENSSL
         STREAM_SSL_OPTION_HANDLERS
 
         case OPT_BOOTSTRAP_CA_CERT:
             stream_ssl_set_ca_cert_file(optarg, true);
             break;
-#endif
 
         case '?':
             exit(EXIT_FAILURE);
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index 6c2fbece7..851696650 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -217,10 +217,8 @@ parse_options(int argc, char *argv[])
         {"version", no_argument, 0, 'V'},
         VLOG_LONG_OPTIONS,
         TABLE_LONG_OPTIONS,
-#ifdef HAVE_OPENSSL
-        STREAM_SSL_LONG_OPTIONS
+        STREAM_SSL_LONG_OPTIONS,
         {"peer-ca-cert", required_argument, 0, OPT_PEER_CA_CERT},
-#endif
         {0, 0, 0, 0},
     };
     char *tmp, *short_options;
@@ -278,13 +276,11 @@ parse_options(int argc, char *argv[])
         VLOG_OPTION_HANDLERS
         TABLE_OPTION_HANDLERS(&table_style)
 
-#ifdef HAVE_OPENSSL
         STREAM_SSL_OPTION_HANDLERS
 
         case OPT_PEER_CA_CERT:
             stream_ssl_set_peer_ca_cert_file(optarg);
             break;
-#endif
 
         case '?':
             exit(EXIT_FAILURE);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 56943f671..55c9f40af 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -1401,7 +1401,7 @@ bridge_run(void)
     /* (Re)configure if necessary. */
     database_changed = ovsdb_idl_run(idl);
     cfg = ovsrec_open_vswitch_first(idl);
-#ifdef HAVE_OPENSSL
+
     /* Re-configure SSL.  We do this on every trip through the main loop,
      * instead of just when the database changes, because the contents of the
      * key and certificate files can change without the database changing.
@@ -1414,7 +1414,7 @@ bridge_run(void)
         stream_ssl_set_key_and_cert(ssl->private_key, ssl->certificate);
         stream_ssl_set_ca_cert_file(ssl->ca_cert, ssl->bootstrap_ca_cert);
     }
-#endif
+
     if (database_changed || datapath_destroyed) {
         if (cfg) {
             struct ovsdb_idl_txn *txn = ovsdb_idl_txn_create(idl);
diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index b9a246184..626cba4f6 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -128,11 +128,9 @@ parse_options(int argc, char *argv[])
         DAEMON_LONG_OPTIONS,
         VLOG_LONG_OPTIONS,
         LEAK_CHECKER_LONG_OPTIONS,
-#ifdef HAVE_OPENSSL
-        STREAM_SSL_LONG_OPTIONS
+        STREAM_SSL_LONG_OPTIONS,
         {"peer-ca-cert", required_argument, 0, OPT_PEER_CA_CERT},
         {"bootstrap-ca-cert", required_argument, 0, OPT_BOOTSTRAP_CA_CERT},
-#endif
         {"enable-dummy", no_argument, 0, OPT_ENABLE_DUMMY},
         {0, 0, 0, 0},
     };
@@ -168,8 +166,6 @@ parse_options(int argc, char *argv[])
         VLOG_OPTION_HANDLERS
         DAEMON_OPTION_HANDLERS
         LEAK_CHECKER_OPTION_HANDLERS
-
-#ifdef HAVE_OPENSSL
         STREAM_SSL_OPTION_HANDLERS
 
         case OPT_PEER_CA_CERT:
@@ -179,7 +175,6 @@ parse_options(int argc, char *argv[])
         case OPT_BOOTSTRAP_CA_CERT:
             stream_ssl_set_ca_cert_file(optarg, true);
             break;
-#endif
 
         case OPT_ENABLE_DUMMY:
             dummy_enable();

From f8d17df2b2600d00267d7fcea1f34615c3f3ef50 Mon Sep 17 00:00:00 2001
From: Justin Pettit <jpettit@nicira.com>
Date: Mon, 9 May 2011 23:30:07 -0700
Subject: [PATCH 39/40] xenserver: Fix bugs related to using
 xe-switch-network-backend in spec file.

Commit daf2ebb (xenserver: Use xe-switch-network-stack in RPM spec
file.) changed the spec file to use xe-switch-network-backend instead of
directly modifying "/etc/xensource/network.conf".  It incorrectly
assumed that the command was in the search path.  It also didn't take
into account that the command will remove the "openvswitch" service with
chkconfig.  This commit fixes those errors.

Signed-off-by: Justin Pettit <jpettit@nicira.com>
---
 xenserver/openvswitch-xen.spec | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec
index 7409fdfc9..9d686ce47 100644
--- a/xenserver/openvswitch-xen.spec
+++ b/xenserver/openvswitch-xen.spec
@@ -226,7 +226,7 @@ done
 
 if [ "$1" = "1" ]; then    # $1 = 1 for install
     # Configure system to use Open vSwitch
-    xe-switch-network-backend vswitch
+    /opt/xensource/bin/xe-switch-network-backend vswitch
 else    # $1 = 2 for upgrade
 
     mode=$(cat /etc/xensource/network.conf)
@@ -248,8 +248,15 @@ depmod %{xen_version}
 
 %preun
 if [ "$1" = "0" ]; then     # $1 = 0 for uninstall
+    # Configure system to use bridge
+    /opt/xensource/bin/xe-switch-network-backend bridge
+
+    # The "openvswitch" service should have been removed from
+    # "xe-switch-network-backend bridge".
     for s in openvswitch openvswitch-xapi-update; do
-        chkconfig --del $s || printf "Could not remove $s init script."
+        if chkconfig --list $s >/dev/null 2>&1; then
+            chkconfig --del $s || printf "Could not remove $s init script."
+        fi
     done
 fi
 
@@ -303,11 +310,10 @@ if [ "$1" = "0" ]; then     # $1 = 0 for uninstall
 
     # Remove saved XenServer scripts directory, but only if it's empty
     rmdir -p /usr/lib/openvswitch/xs-saved 2>/dev/null
-
-    # Configure system to use bridge
-    xe-switch-network-backend bridge
 fi
 
+exit 0
+
 %files
 %defattr(-,root,root)
 /etc/init.d/openvswitch

From 2db65bf72c008cf7ee658d0b44744b39495ead14 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 10 May 2011 11:48:36 -0700
Subject: [PATCH 40/40] datapath: Pull data into linear area only on demand.

We currently always pull 64 bytes of data (if it exists) into the
skb linear data area when parsing flows.  The theory behind this
is that the data should always be there and it's enough to parse
common flows.  However, this causes a number of problems in
different situations.  The first is that it is not enough to handle
IPv6 so we must pull additional data anyways.  However, the main
problem is that GRO typically allocates a new skb and puts just the
headers in there.  For a typical TCP/IPv4 packet there are 54 bytes
of headers, which means that we must possibly reallocate and copy
on every packet.  In addition, GRO creates frag_lists with this
specific geometry in order to allow later segmentation if the packet
is forwarded to a device that does not support frag_lists.  When
we pull additional data it changes the geometry and causes later
problems for the device.  This patch instead incrementally pulls
data, which avoids these problems.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Ian Campbell <Ian.Campbell@citrix.com>
---
 datapath/flow.c | 118 +++++++++++++++++++++++-------------------------
 1 file changed, 57 insertions(+), 61 deletions(-)

diff --git a/datapath/flow.c b/datapath/flow.c
index d670925af..d678979b3 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -39,30 +39,36 @@
 static struct kmem_cache *flow_cache;
 static unsigned int hash_seed __read_mostly;
 
+static int check_header(struct sk_buff *skb, int len)
+{
+	if (unlikely(skb->len < len))
+		return -EINVAL;
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+	return 0;
+}
+
 static inline bool arphdr_ok(struct sk_buff *skb)
 {
-	return skb->len >= skb_network_offset(skb) + sizeof(struct arp_eth_header);
+	return pskb_may_pull(skb, skb_network_offset(skb) +
+				  sizeof(struct arp_eth_header));
 }
 
 static inline int check_iphdr(struct sk_buff *skb)
 {
 	unsigned int nh_ofs = skb_network_offset(skb);
 	unsigned int ip_len;
+	int err;
 
-	if (skb->len < nh_ofs + sizeof(struct iphdr))
-		return -EINVAL;
+	err = check_header(skb, nh_ofs + sizeof(struct iphdr));
+	if (unlikely(err))
+		return err;
 
 	ip_len = ip_hdrlen(skb);
-	if (ip_len < sizeof(struct iphdr) || skb->len < nh_ofs + ip_len)
+	if (unlikely(ip_len < sizeof(struct iphdr) ||
+		     skb->len < nh_ofs + ip_len))
 		return -EINVAL;
 
-	/*
-	 * Pull enough header bytes to account for the IP header plus the
-	 * longest transport header that we parse, currently 20 bytes for TCP.
-	 */
-	if (!pskb_may_pull(skb, min(nh_ofs + ip_len + 20, skb->len)))
-		return -ENOMEM;
-
 	skb_set_transport_header(skb, nh_ofs + ip_len);
 	return 0;
 }
@@ -70,22 +76,29 @@ static inline int check_iphdr(struct sk_buff *skb)
 static inline bool tcphdr_ok(struct sk_buff *skb)
 {
 	int th_ofs = skb_transport_offset(skb);
-	if (skb->len >= th_ofs + sizeof(struct tcphdr)) {
-		int tcp_len = tcp_hdrlen(skb);
-		return (tcp_len >= sizeof(struct tcphdr)
-			&& skb->len >= th_ofs + tcp_len);
-	}
-	return false;
+	int tcp_len;
+
+	if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
+		return false;
+
+	tcp_len = tcp_hdrlen(skb);
+	if (unlikely(tcp_len < sizeof(struct tcphdr) ||
+		     skb->len < th_ofs + tcp_len))
+		return false;
+
+	return true;
 }
 
 static inline bool udphdr_ok(struct sk_buff *skb)
 {
-	return skb->len >= skb_transport_offset(skb) + sizeof(struct udphdr);
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct udphdr));
 }
 
 static inline bool icmphdr_ok(struct sk_buff *skb)
 {
-	return skb->len >= skb_transport_offset(skb) + sizeof(struct icmphdr);
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct icmphdr));
 }
 
 u64 flow_used_time(unsigned long flow_jiffies)
@@ -108,9 +121,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 	int payload_ofs;
 	struct ipv6hdr *nh;
 	uint8_t nexthdr;
+	int err;
 
-	if (unlikely(skb->len < nh_ofs + sizeof(*nh)))
-		return -EINVAL;
+	err = check_header(skb, nh_ofs + sizeof(*nh));
+	if (unlikely(err))
+		return err;
 
 	nh = ipv6_hdr(skb);
 	nexthdr = nh->nexthdr;
@@ -126,15 +141,6 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 		return -EINVAL;
 
 	nh_len = payload_ofs - nh_ofs;
-
-	/* Pull enough header bytes to account for the IP header plus the
-	 * longest transport header that we parse, currently 20 bytes for TCP.
-	 * To dig deeper than the transport header, transport parsers may need
-	 * to pull more header bytes.
-	 */
-	if (unlikely(!pskb_may_pull(skb, min(nh_ofs + nh_len + 20, skb->len))))
-		return -ENOMEM;
-
 	skb_set_transport_header(skb, nh_ofs + nh_len);
 	key->nw_proto = nexthdr;
 	return nh_len;
@@ -142,7 +148,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
 
 static bool icmp6hdr_ok(struct sk_buff *skb)
 {
-	return skb->len >= skb_transport_offset(skb) + sizeof(struct icmp6hdr);
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct icmp6hdr));
 }
 
 #define TCP_FLAGS_OFFSET 13
@@ -257,7 +264,7 @@ void flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
 	call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
 }
 
-static void parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
 	struct qtag_prefix {
 		__be16 eth_type; /* ETH_P_8021Q */
@@ -265,12 +272,15 @@ static void parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 	};
 	struct qtag_prefix *qp;
 
-	if (skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))
-		return;
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
+					 sizeof(__be16))))
+		return -ENOMEM;
 
 	qp = (struct qtag_prefix *) skb->data;
 	key->dl_tci = qp->tci | htons(VLAN_TAG_PRESENT);
 	__skb_pull(skb, sizeof(struct qtag_prefix));
+
+	return 0;
 }
 
 static __be16 parse_ethertype(struct sk_buff *skb)
@@ -291,9 +301,12 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 	if (ntohs(proto) >= 1536)
 		return proto;
 
-	if (unlikely(skb->len < sizeof(struct llc_snap_hdr)))
+	if (skb->len < sizeof(struct llc_snap_hdr))
 		return htons(ETH_P_802_2);
 
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
+		return htons(0);
+
 	llc = (struct llc_snap_hdr *) skb->data;
 	if (llc->dsap != LLC_SAP_SNAP ||
 	    llc->ssap != LLC_SAP_SNAP ||
@@ -410,45 +423,28 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
 	key->in_port = in_port;
 	*is_frag = false;
 
-	/*
-	 * We would really like to pull as many bytes as we could possibly
-	 * want to parse into the linear data area.  Currently, for IPv4,
-	 * that is:
-	 *
-	 *    14     Ethernet header
-	 *     4     VLAN header
-	 *    60     max IP header with options
-	 *    20     max TCP/UDP/ICMP header (don't care about options)
-	 *    --
-	 *    98
-	 *
-	 * But Xen only allocates 64 or 72 bytes for the linear data area in
-	 * netback, which means that we would reallocate and copy the skb's
-	 * linear data on every packet if we did that.  So instead just pull 64
-	 * bytes, which is always sufficient without IP options, and then check
-	 * whether we need to pull more later when we look at the IP header.
-	 */
-	if (!pskb_may_pull(skb, min(skb->len, 64u)))
-		return -ENOMEM;
-
 	skb_reset_mac_header(skb);
 
-	/* Link layer. */
+	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
+	 * header in the linear data area.
+	 */
 	eth = eth_hdr(skb);
 	memcpy(key->dl_src, eth->h_source, ETH_ALEN);
 	memcpy(key->dl_dst, eth->h_dest, ETH_ALEN);
-
-	/* dl_type, dl_vlan, dl_vlan_pcp. */
 	__skb_pull(skb, 2 * ETH_ALEN);
 
 	if (vlan_tx_tag_present(skb))
 		key->dl_tci = htons(vlan_get_tci(skb));
 	else if (eth->h_proto == htons(ETH_P_8021Q))
-		parse_vlan(skb, key);
+		if (unlikely(parse_vlan(skb, key)))
+			return -ENOMEM;
 
 	key->dl_type = parse_ethertype(skb);
+	if (unlikely(key->dl_type == htons(0)))
+		return -ENOMEM;
+
 	skb_reset_network_header(skb);
-	__skb_push(skb, skb->data - (unsigned char *)eth);
+	__skb_push(skb, skb->data - skb_mac_header(skb));
 
 	/* Network layer. */
 	if (key->dl_type == htons(ETH_P_IP)) {