diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 15cf5c134..423daba61 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -68,6 +68,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev); #define NETDEV_RULE_PRIORITY 0x8000 #define NR_THREADS 1 +/* Use per thread recirc_depth to prevent recirculation loop. */ +#define MAX_RECIRC_DEPTH 5 +DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) /* Configuration parameters. */ enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ @@ -1997,8 +2000,9 @@ dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type) } static void -dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, - struct pkt_metadata *md) +dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet, + struct pkt_metadata *md) + OVS_REQ_RDLOCK(dp->port_rwlock) { struct dp_netdev_flow *netdev_flow; struct flow key; @@ -2027,6 +2031,17 @@ dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, } } +static void +dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, + struct pkt_metadata *md) + OVS_REQ_RDLOCK(dp->port_rwlock) +{ + uint32_t *recirc_depth = recirc_depth_get(); + + *recirc_depth = 0; + dp_netdev_input(dp, packet, md); +} + static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, int queue_no, int type, const struct flow *flow, @@ -2096,6 +2111,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, struct dp_netdev_execute_aux *aux = aux_; int type = nl_attr_type(a); struct dp_netdev_port *p; + uint32_t *depth = recirc_depth_get(); switch ((enum ovs_action_attr)type) { case OVS_ACTION_ATTR_OUTPUT: @@ -2122,23 +2138,40 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, break; } - case OVS_ACTION_ATTR_RECIRC: { - const struct ovs_action_recirc *act; + case OVS_ACTION_ATTR_RECIRC: + if (*depth < MAX_RECIRC_DEPTH) { + uint32_t old_recirc_id = md->recirc_id; + uint32_t old_dp_hash = md->dp_hash; + const struct ovs_action_recirc *act; + struct ofpbuf *recirc_packet; - act = nl_attr_get(a); - md->recirc_id = act->recirc_id; - md->dp_hash = 0; + recirc_packet = may_steal ? packet : ofpbuf_clone(packet); - if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) { - struct flow flow; + act = nl_attr_get(a); + md->recirc_id = act->recirc_id; + md->dp_hash = 0; - flow_extract(packet, md, &flow); - md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias); + if (act->hash_alg == OVS_RECIRC_HASH_ALG_L4) { + struct flow flow; + + flow_extract(recirc_packet, md, &flow); + md->dp_hash = flow_hash_symmetric_l4(&flow, act->hash_bias); + if (!md->dp_hash) { + md->dp_hash = 1; /* 0 is not valid */ + } + } + + (*depth)++; + dp_netdev_input(aux->dp, recirc_packet, md); + (*depth)--; + + md->recirc_id = old_recirc_id; + md->recirc_id = old_dp_hash; + break; + } else { + VLOG_WARN("Packet dropped. Max recirculation depth exceeded."); } - - dp_netdev_port_input(aux->dp, packet, md); break; - } case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_POP_VLAN: @@ -2150,7 +2183,6 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } - } static void diff --git a/lib/flow.c b/lib/flow.c index f97d7a92e..6c6978d6b 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -370,6 +370,8 @@ flow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, flow->in_port = md->in_port; flow->skb_priority = md->skb_priority; flow->pkt_mark = md->pkt_mark; + flow->recirc_id = md->recirc_id; + flow->dp_hash = md->dp_hash; } ofpbuf_set_frame(packet, ofpbuf_data(packet)); diff --git a/lib/match.c b/lib/match.c index 2969972e6..514e7f937 100644 --- a/lib/match.c +++ b/lib/match.c @@ -788,6 +788,34 @@ match_hash(const struct match *match, uint32_t basis) return flow_wildcards_hash(&match->wc, flow_hash(&match->flow, basis)); } +static bool +match_has_default_recirc_id(const struct match *m) +{ + return m->flow.recirc_id == 0 && (m->wc.masks.recirc_id == UINT32_MAX || + m->wc.masks.recirc_id == 0); +} + +static bool +match_has_default_dp_hash(const struct match *m) +{ + return ((m->flow.dp_hash | m->wc.masks.dp_hash) == 0); +} + +/* Return true if the hidden fields of the match are set to the default values. + * The default values equals to those set up by match_init_hidden_fields(). */ +bool +match_has_default_hidden_fields(const struct match *m) +{ + return match_has_default_recirc_id(m) && match_has_default_dp_hash(m); +} + +void +match_init_hidden_fields(struct match *m) +{ + match_set_recirc_id(m, 0); + match_set_dp_hash_masked(m, 0, 0); +} + static void format_eth_masked(struct ds *s, const char *name, const uint8_t eth[6], const uint8_t mask[6]) diff --git a/lib/match.h b/lib/match.h index 95c8e67e6..2422fb1b5 100644 --- a/lib/match.h +++ b/lib/match.h @@ -134,6 +134,9 @@ void match_set_nd_target_masked(struct match *, const struct in6_addr *, bool match_equal(const struct match *, const struct match *); uint32_t match_hash(const struct match *, uint32_t basis); +void match_init_hidden_fields(struct match *); +bool match_has_default_hidden_fields(const struct match *); + void match_format(const struct match *, struct ds *, unsigned int priority); char *match_to_string(const struct match *, unsigned int priority); void match_print(const struct match *); diff --git a/lib/odp-execute.c b/lib/odp-execute.c index ac0dac0dd..37e44e31a 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * Copyright (c) 2013 Simon Horman * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -209,10 +209,11 @@ odp_execute_actions__(void *dp, struct ofpbuf *packet, bool steal, case OVS_ACTION_ATTR_USERSPACE: case OVS_ACTION_ATTR_RECIRC: if (dp_execute_action) { - bool may_steal; /* Allow 'dp_execute_action' to steal the packet data if we do * not need it any more. */ - may_steal = steal && (!more_actions && left <= NLA_ALIGN(a->nla_len)); + bool may_steal = steal && (!more_actions + && left <= NLA_ALIGN(a->nla_len) + && type != OVS_ACTION_ATTR_RECIRC); dp_execute_action(dp, packet, md, a, may_steal); } break; diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index 2ada88d06..d2500427e 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -1326,6 +1326,9 @@ parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string, } else if (fields & F_FLAGS && !strcmp(name, "no_byte_counts")) { fm->flags |= OFPUTIL_FF_NO_BYT_COUNTS; *usable_protocols &= OFPUTIL_P_OF13_UP; + } else if (!strcmp(name, "no_readonly_table") + || !strcmp(name, "allow_hidden_fields")) { + /* ignore these fields. */ } else { char *value; diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 02f3db035..9091b1bf5 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -743,6 +743,12 @@ ofp_print_flow_flags(struct ds *s, enum ofputil_flow_mod_flags flags) if (flags & OFPUTIL_FF_NO_BYT_COUNTS) { ds_put_cstr(s, "no_byte_counts "); } + if (flags & OFPUTIL_FF_HIDDEN_FIELDS) { + ds_put_cstr(s, "allow_hidden_fields "); + } + if (flags & OFPUTIL_FF_NO_READONLY) { + ds_put_cstr(s, "no_readonly_table "); + } } static void diff --git a/lib/ofp-util.h b/lib/ofp-util.h index 298d595ef..245cc4e06 100644 --- a/lib/ofp-util.h +++ b/lib/ofp-util.h @@ -246,6 +246,13 @@ enum ofputil_flow_mod_flags { OFPUTIL_FF_CHECK_OVERLAP = 1 << 3, /* All versions. */ OFPUTIL_FF_EMERG = 1 << 4, /* OpenFlow 1.0 only. */ OFPUTIL_FF_RESET_COUNTS = 1 << 5, /* OpenFlow 1.2+. */ + + /* Flags that are only set by OVS for its internal use. Cannot be set via + * OpenFlow. */ + OFPUTIL_FF_HIDDEN_FIELDS = 1 << 6, /* Allow hidden match fields to be + set or modified. */ + OFPUTIL_FF_NO_READONLY = 1 << 7, /* Allow rules within read only tables + to be modified */ }; /* Protocol-independent flow_mod. diff --git a/ofproto/bond.c b/ofproto/bond.c index 681233060..49dd49e0b 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -23,6 +23,11 @@ #include #include +#include "ofp-util.h" +#include "ofp-actions.h" +#include "ofpbuf.h" +#include "ofproto/ofproto-provider.h" +#include "ofproto/ofproto-dpif.h" #include "connectivity.h" #include "coverage.h" #include "dynamic-string.h" @@ -36,6 +41,7 @@ #include "packets.h" #include "poll-loop.h" #include "seq.h" +#include "match.h" #include "shash.h" #include "timeval.h" #include "unixctl.h" @@ -50,6 +56,7 @@ static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__; /* Bit-mask for hashing a flow down to a bucket. * There are (BOND_MASK + 1) buckets. */ #define BOND_MASK 0xff +#define RECIRC_RULE_PRIORITY 20 /* Priority level for internal rules */ /* A hash bucket for mapping a flow to a slave. * "struct bond" has an array of (BOND_MASK + 1) of these. */ @@ -57,6 +64,12 @@ struct bond_entry { struct bond_slave *slave; /* Assigned slave, NULL if unassigned. */ uint64_t tx_bytes; /* Count of bytes recently transmitted. */ struct list list_node; /* In bond_slave's 'entries' list. */ + + /* Recirculation. */ + struct rule *pr_rule; /* Post recirculation rule for this entry.*/ + uint64_t pr_tx_bytes; /* Record the rule tx_bytes to figure out + the delta to update the tx_bytes entry + above.*/ }; /* A bond slave, that is, one of the links comprising a bond. */ @@ -68,6 +81,7 @@ struct bond_slave { struct netdev *netdev; /* Network device, owned by the client. */ unsigned int change_seq; /* Tracks changes in 'netdev'. */ + ofp_port_t ofp_port; /* Open flow port number */ char *name; /* Name (a copy of netdev_get_name(netdev)). */ /* Link status. */ @@ -86,6 +100,7 @@ struct bond_slave { struct bond { struct hmap_node hmap_node; /* In 'all_bonds' hmap. */ char *name; /* Name provided by client. */ + struct ofproto_dpif *ofproto; /* The bridge this bond belongs to. */ /* Slaves. */ struct hmap slaves; @@ -111,6 +126,8 @@ struct bond { int rebalance_interval; /* Interval between rebalances, in ms. */ long long int next_rebalance; /* Next rebalancing time. */ bool send_learning_packets; + uint32_t recirc_id; /* Non zero if recirculation can be used.*/ + struct hmap pr_rule_ops; /* Helps to maintain post recirculation rules.*/ /* Legacy compatibility. */ long long int next_fake_iface_update; /* LLONG_MAX if disabled. */ @@ -119,6 +136,21 @@ struct bond { struct ovs_refcount ref_cnt; }; +/* What to do with an bond_recirc_rule. */ +enum bond_op { + ADD, /* Add the rule to ofproto's flow table. */ + DEL, /* Delete the rule from the ofproto's flow table. */ +}; + +/* A rule to add to or delete from ofproto's internal flow table. */ +struct bond_pr_rule_op { + struct hmap_node hmap_node; + struct match match; + ofp_port_t out_ofport; + enum bond_op op; + struct rule *pr_rule; +}; + static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock); static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_) OVS_REQ_RDLOCK(rwlock); @@ -185,17 +217,21 @@ bond_mode_to_string(enum bond_mode balance) { * The caller should register each slave on the new bond by calling * bond_slave_register(). */ struct bond * -bond_create(const struct bond_settings *s) +bond_create(const struct bond_settings *s, struct ofproto_dpif *ofproto) { struct bond *bond; bond = xzalloc(sizeof *bond); + bond->ofproto = ofproto; hmap_init(&bond->slaves); list_init(&bond->enabled_slaves); ovs_mutex_init(&bond->mutex); bond->next_fake_iface_update = LLONG_MAX; ovs_refcount_init(&bond->ref_cnt); + bond->recirc_id = 0; + hmap_init(&bond->pr_rule_ops); + bond_reconfigure(bond, s); return bond; } @@ -216,6 +252,7 @@ void bond_unref(struct bond *bond) { struct bond_slave *slave, *next_slave; + struct bond_pr_rule_op *pr_op, *next_op; if (!bond || ovs_refcount_unref(&bond->ref_cnt) != 1) { return; @@ -236,9 +273,124 @@ bond_unref(struct bond *bond) ovs_mutex_destroy(&bond->mutex); free(bond->hash); free(bond->name); + + HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) { + hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node); + free(pr_op); + } + hmap_destroy(&bond->pr_rule_ops); + + if (bond->recirc_id) { + ofproto_dpif_free_recirc_id(bond->ofproto, bond->recirc_id); + } + free(bond); } +static void +add_pr_rule(struct bond *bond, const struct match *match, + ofp_port_t out_ofport, struct rule *rule) +{ + uint32_t hash = match_hash(match, 0); + struct bond_pr_rule_op *pr_op; + + HMAP_FOR_EACH_WITH_HASH(pr_op, hmap_node, hash, &bond->pr_rule_ops) { + if (match_equal(&pr_op->match, match)) { + pr_op->op = ADD; + pr_op->out_ofport = out_ofport; + pr_op->pr_rule = rule; + return; + } + } + + pr_op = xmalloc(sizeof *pr_op); + pr_op->match = *match; + pr_op->op = ADD; + pr_op->out_ofport = out_ofport; + pr_op->pr_rule = rule; + hmap_insert(&bond->pr_rule_ops, &pr_op->hmap_node, hash); +} + +static void +update_recirc_rules(struct bond *bond) +{ + struct match match; + struct bond_pr_rule_op *pr_op, *next_op; + uint64_t ofpacts_stub[128 / 8]; + struct ofpbuf ofpacts; + int i; + + ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); + + HMAP_FOR_EACH(pr_op, hmap_node, &bond->pr_rule_ops) { + pr_op->op = DEL; + } + + if ((bond->hash == NULL) || (!bond->recirc_id)) { + return; + } + + for (i = 0; i < BOND_MASK + 1; i++) { + struct bond_slave *slave = bond->hash[i].slave; + + if (slave) { + match_init_catchall(&match); + match_set_recirc_id(&match, bond->recirc_id); + /* recirc_id -> metadata to speed up look ups. */ + match_set_metadata(&match, htonll(bond->recirc_id)); + match_set_dp_hash_masked(&match, i, BOND_MASK); + + add_pr_rule(bond, &match, slave->ofp_port, + bond->hash[i].pr_rule); + } + } + + HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) { + int error; + struct rule *rule; + switch (pr_op->op) { + case ADD: + ofpbuf_clear(&ofpacts); + ofpact_put_OUTPUT(&ofpacts)->port = pr_op->out_ofport; + error = ofproto_dpif_add_internal_flow(bond->ofproto, + &pr_op->match, + RECIRC_RULE_PRIORITY, + &ofpacts, &rule); + if (error) { + char *err_s = match_to_string(&pr_op->match, + RECIRC_RULE_PRIORITY); + + VLOG_ERR("failed to add post recirculation flow %s", err_s); + free(err_s); + pr_op->pr_rule = NULL; + } else { + pr_op->pr_rule = rule; + } + break; + + case DEL: + error = ofproto_dpif_delete_internal_flow(bond->ofproto, + &pr_op->match, + RECIRC_RULE_PRIORITY); + if (error) { + char *err_s = match_to_string(&pr_op->match, + RECIRC_RULE_PRIORITY); + + VLOG_ERR("failed to remove post recirculation flow %s", err_s); + free(err_s); + } + + hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node); + pr_op->pr_rule = NULL; + free(pr_op); + break; + } + } + + ofpbuf_uninit(&ofpacts); +} + + /* Updates 'bond''s overall configuration to 's'. * * The caller should register each slave on 'bond' by calling @@ -299,6 +451,15 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) bond->bond_revalidate = false; } + if (bond->balance != BM_AB) { + if (!bond->recirc_id) { + bond->recirc_id = ofproto_dpif_alloc_recirc_id(bond->ofproto); + } + } else if (bond->recirc_id) { + ofproto_dpif_free_recirc_id(bond->ofproto, bond->recirc_id); + bond->recirc_id = 0; + } + if (bond->balance == BM_AB || !bond->hash || revalidate) { bond_entry_reset(bond); } @@ -327,7 +488,8 @@ bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev) * 'slave_' or destroying 'bond'. */ void -bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev) +bond_slave_register(struct bond *bond, void *slave_, + ofp_port_t ofport, struct netdev *netdev) { struct bond_slave *slave; @@ -339,6 +501,7 @@ bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev) hmap_insert(&bond->slaves, &slave->hmap_node, hash_pointer(slave_, 0)); slave->bond = bond; slave->aux = slave_; + slave->ofp_port = ofport; slave->delay_expires = LLONG_MAX; slave->name = xstrdup(netdev_get_name(netdev)); bond->bond_revalidate = true; @@ -688,6 +851,84 @@ bond_choose_output_slave(struct bond *bond, const struct flow *flow, return aux; } +/* Recirculation. */ +static void +bond_entry_account(struct bond_entry *entry, uint64_t rule_tx_bytes) + OVS_REQ_RDLOCK(rwlock) +{ + if (entry->slave) { + uint64_t delta; + + delta = rule_tx_bytes - entry->pr_tx_bytes; + entry->tx_bytes += delta; + entry->pr_tx_bytes = rule_tx_bytes; + } +} + +/* Maintain bond stats using post recirculation rule byte counters.*/ +void +bond_recirculation_account(struct bond *bond) +{ + int i; + + ovs_rwlock_rdlock(&rwlock); + for (i=0; i<=BOND_MASK; i++) { + struct bond_entry *entry = &bond->hash[i]; + struct rule *rule = entry->pr_rule; + + if (rule) { + uint64_t n_packets OVS_UNUSED; + long long int used OVS_UNUSED; + uint64_t n_bytes; + + rule->ofproto->ofproto_class->rule_get_stats( + rule, &n_packets, &n_bytes, &used); + bond_entry_account(entry, n_bytes); + } + } + ovs_rwlock_unlock(&rwlock); +} + +bool +bond_may_recirc(const struct bond *bond, uint32_t *recirc_id, + uint32_t *hash_bias) +{ + if (bond->balance == BM_TCP) { + if (recirc_id) { + *recirc_id = bond->recirc_id; + } + if (hash_bias) { + *hash_bias = bond->basis; + } + return true; + } else { + return false; + } +} + +void +bond_update_post_recirc_rules(struct bond* bond, const bool force) +{ + struct bond_entry *e; + bool update_rules = force; /* Always update rules if caller forces it. */ + + /* Make sure all bond entries are populated */ + for (e = bond->hash; e <= &bond->hash[BOND_MASK]; e++) { + if (!e->slave || !e->slave->enabled) { + update_rules = true; + e->slave = CONTAINER_OF(hmap_random_node(&bond->slaves), + struct bond_slave, hmap_node); + if (!e->slave->enabled) { + e->slave = bond->active_slave; + } + } + } + + if (update_rules) { + update_recirc_rules(bond); + } +} + /* Rebalancing. */ static bool @@ -845,19 +1086,22 @@ reinsert_bal(struct list *bals, struct bond_slave *slave) /* If 'bond' needs rebalancing, does so. * - * The caller should have called bond_account() for each active flow, to ensure - * that flow data is consistently accounted at this point. */ -void + * The caller should have called bond_account() for each active flow, or in case + * of recirculation is used, have called bond_recirculation_account(bond), + * to ensure that flow data is consistently accounted at this point. + * + * Return whether rebalancing took place.*/ +bool bond_rebalance(struct bond *bond) { struct bond_slave *slave; struct bond_entry *e; struct list bals; + bool rebalanced = false; ovs_rwlock_wrlock(&rwlock); if (!bond_is_balanced(bond) || time_msec() < bond->next_rebalance) { - ovs_rwlock_unlock(&rwlock); - return; + goto done; } bond->next_rebalance = time_msec() + bond->rebalance_interval; @@ -916,6 +1160,7 @@ bond_rebalance(struct bond *bond) /* Re-sort 'bals'. */ reinsert_bal(&bals, from); reinsert_bal(&bals, to); + rebalanced = true; } else { /* Can't usefully migrate anything away from 'from'. * Don't reconsider it. */ @@ -932,7 +1177,10 @@ bond_rebalance(struct bond *bond) e->slave = NULL; } } + +done: ovs_rwlock_unlock(&rwlock); + return rebalanced; } /* Bonding unixctl user interface functions. */ @@ -972,15 +1220,15 @@ bond_unixctl_list(struct unixctl_conn *conn, struct ds ds = DS_EMPTY_INITIALIZER; const struct bond *bond; - ds_put_cstr(&ds, "bond\ttype\tslaves\n"); + ds_put_cstr(&ds, "bond\ttype\trecircID\tslaves\n"); ovs_rwlock_rdlock(&rwlock); HMAP_FOR_EACH (bond, hmap_node, all_bonds) { const struct bond_slave *slave; size_t i; - ds_put_format(&ds, "%s\t%s\t", - bond->name, bond_mode_to_string(bond->balance)); + ds_put_format(&ds, "%s\t%s\t%d\t", bond->name, + bond_mode_to_string(bond->balance), bond->recirc_id); i = 0; HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { @@ -1003,12 +1251,18 @@ bond_print_details(struct ds *ds, const struct bond *bond) struct shash slave_shash = SHASH_INITIALIZER(&slave_shash); const struct shash_node **sorted_slaves = NULL; const struct bond_slave *slave; + bool may_recirc; + uint32_t recirc_id; int i; ds_put_format(ds, "---- %s ----\n", bond->name); ds_put_format(ds, "bond_mode: %s\n", bond_mode_to_string(bond->balance)); + may_recirc = bond_may_recirc(bond, &recirc_id, NULL); + ds_put_format(ds, "bond may use recirculation: %s, Recirc-ID : %d\n", + may_recirc ? "yes" : "no", may_recirc ? recirc_id: -1); + ds_put_format(ds, "bond-hash-basis: %"PRIu32"\n", bond->basis); ds_put_format(ds, "updelay: %d ms\n", bond->updelay); diff --git a/ofproto/bond.h b/ofproto/bond.h index 5b3814e85..e5ceb45a7 100644 --- a/ofproto/bond.h +++ b/ofproto/bond.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,12 +19,13 @@ #include #include - +#include "ofproto-provider.h" #include "packets.h" struct flow; struct netdev; struct ofpbuf; +struct ofproto_dpif; enum lacp_status; /* How flows are balanced among bond slaves. */ @@ -60,12 +61,13 @@ struct bond_settings { void bond_init(void); /* Basics. */ -struct bond *bond_create(const struct bond_settings *); +struct bond *bond_create(const struct bond_settings *, + struct ofproto_dpif *ofproto); void bond_unref(struct bond *); struct bond *bond_ref(const struct bond *); bool bond_reconfigure(struct bond *, const struct bond_settings *); -void bond_slave_register(struct bond *, void *slave_, struct netdev *); +void bond_slave_register(struct bond *, void *slave_, ofp_port_t ofport, struct netdev *); void bond_slave_set_netdev(struct bond *, void *slave_, struct netdev *); void bond_slave_unregister(struct bond *, const void *slave); @@ -94,6 +96,27 @@ void *bond_choose_output_slave(struct bond *, const struct flow *, /* Rebalancing. */ void bond_account(struct bond *, const struct flow *, uint16_t vlan, uint64_t n_bytes); -void bond_rebalance(struct bond *); +bool bond_rebalance(struct bond *); +/* Recirculation + * + * Only balance_tcp mode uses recirculation. + * + * When recirculation is used, each bond port is assigned with a unique + * recirc_id. The output action to the bond port will be replaced by + * a RECIRC action. + * + * ... actions= ... RECIRC(L4_HASH, recirc_id) .... + * + * On handling first output packet, 256 post recirculation flows are installed: + * + * recirc_id=, dp_hash=<[0..255]>/0xff, actions: output + * + * Bond module pulls stats from those post recirculation rules. If rebalancing + * is needed, those rules are updated with new output actions. +*/ +void bond_update_post_recirc_rules(struct bond *, const bool force); +bool bond_may_recirc(const struct bond *, uint32_t *recirc_id, + uint32_t *hash_bias); +void bond_recirculation_account(struct bond *); #endif /* bond.h */ diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index b8e808480..c3040d729 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -58,6 +58,8 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate); /* Maximum depth of flow table recursion (due to resubmit actions) in a * flow translation. */ #define MAX_RESUBMIT_RECURSION 64 +#define MAX_INTERNAL_RESUBMITS 1 /* Max resbmits allowed using rules in + internal table. */ /* Maximum number of resubmit actions in a flow translation, whether they are * recursive or not. */ @@ -89,6 +91,9 @@ struct xbridge { bool has_in_band; /* Bridge has in band control? */ bool forward_bpdu; /* Bridge forwards STP BPDUs? */ + /* True if the datapath supports recirculation. */ + bool enable_recirc; + /* True if the datapath supports variable-length * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. * False if the datapath supports only 8-byte (or shorter) userdata. */ @@ -226,8 +231,8 @@ static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len, struct xlate_ctx *); static void xlate_actions__(struct xlate_in *, struct xlate_out *) OVS_REQ_RDLOCK(xlate_rwlock); - static void xlate_normal(struct xlate_ctx *); - static void xlate_report(struct xlate_ctx *, const char *); +static void xlate_normal(struct xlate_ctx *); +static void xlate_report(struct xlate_ctx *, const char *); static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port, uint8_t table_id, bool may_packet_in, bool honor_table_miss); @@ -257,6 +262,7 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name, const struct dpif_ipfix *ipfix, const struct netflow *netflow, enum ofp_config_flags frag, bool forward_bpdu, bool has_in_band, + bool enable_recirc, bool variable_length_userdata, size_t max_mpls_depth) { @@ -310,6 +316,7 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name, xbridge->frag = frag; xbridge->miss_rule = miss_rule; xbridge->no_packet_in_rule = no_packet_in_rule; + xbridge->enable_recirc = enable_recirc; xbridge->variable_length_userdata = variable_length_userdata; xbridge->max_mpls_depth = max_mpls_depth; } @@ -1131,10 +1138,23 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle, /* Partially configured bundle with no slaves. Drop the packet. */ return; } else if (!out_xbundle->bond) { + ctx->xout->use_recirc = false; xport = CONTAINER_OF(list_front(&out_xbundle->xports), struct xport, bundle_node); } else { struct ofport_dpif *ofport; + struct xlate_recirc *xr = &ctx->xout->recirc; + + if (ctx->xbridge->enable_recirc) { + ctx->xout->use_recirc = bond_may_recirc( + out_xbundle->bond, &xr->recirc_id, &xr->hash_bias); + + if (ctx->xout->use_recirc) { + /* Only TCP mode uses recirculation. */ + xr->hash_alg = OVS_RECIRC_HASH_ALG_L4; + bond_update_post_recirc_rules(out_xbundle->bond, false); + } + } ofport = bond_choose_output_slave(out_xbundle->bond, &ctx->xin->flow, &ctx->xout->wc, vid); @@ -1817,8 +1837,20 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow, &ctx->xout->odp_actions, &ctx->xout->wc); - nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT, - out_port); + + if (ctx->xout->use_recirc) { + struct ovs_action_recirc *act_recirc; + struct xlate_recirc *xr = &ctx->xout->recirc; + + act_recirc = nl_msg_put_unspec_uninit(&ctx->xout->odp_actions, + OVS_ACTION_ATTR_RECIRC, sizeof *act_recirc); + act_recirc->recirc_id = xr->recirc_id; + act_recirc->hash_alg = xr->hash_alg; + act_recirc->hash_bias = xr->hash_bias; + } else { + nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT, + out_port); + } ctx->sflow_odp_port = odp_port; ctx->sflow_n_outputs++; @@ -1862,10 +1894,10 @@ xlate_resubmit_resource_check(struct xlate_ctx *ctx) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - if (ctx->recurse >= MAX_RESUBMIT_RECURSION) { + if (ctx->recurse >= MAX_RESUBMIT_RECURSION + MAX_INTERNAL_RESUBMITS) { VLOG_ERR_RL(&rl, "resubmit actions recursed over %d times", MAX_RESUBMIT_RECURSION); - } else if (ctx->resubmits >= MAX_RESUBMITS) { + } else if (ctx->resubmits >= MAX_RESUBMITS + MAX_INTERNAL_RESUBMITS) { VLOG_ERR_RL(&rl, "over %d resubmit actions", MAX_RESUBMITS); } else if (ofpbuf_size(&ctx->xout->odp_actions) > UINT16_MAX) { VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of actions"); @@ -2086,6 +2118,15 @@ xlate_ofpact_resubmit(struct xlate_ctx *ctx, { ofp_port_t in_port; uint8_t table_id; + bool may_packet_in = false; + bool honor_table_miss = false; + + if (ctx->rule && rule_dpif_is_internal(ctx->rule)) { + /* Still allow missed packets to be sent to the controller + * if resubmitting from an internal table. */ + may_packet_in = true; + honor_table_miss = true; + } in_port = resubmit->in_port; if (in_port == OFPP_IN_PORT) { @@ -2097,7 +2138,8 @@ xlate_ofpact_resubmit(struct xlate_ctx *ctx, table_id = ctx->table_id; } - xlate_table_action(ctx, in_port, table_id, false, false); + xlate_table_action(ctx, in_port, table_id, may_packet_in, + honor_table_miss); } static void @@ -3069,6 +3111,7 @@ xlate_actions__(struct xlate_in *xin, struct xlate_out *xout) ctx.rule = rule; } xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule); + xout->use_recirc = false; if (xin->ofpacts) { ofpacts = xin->ofpacts; diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index 8b01d4e46..8b53e10d3 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -32,6 +32,12 @@ struct dpif_ipfix; struct dpif_sflow; struct mac_learning; +struct xlate_recirc { + uint32_t recirc_id; /* !0 Use recirculation instead of output. */ + uint8_t hash_alg; /* !0 Compute hash for recirc before. */ + uint32_t hash_bias; /* Compute hash for recirc before. */ +}; + struct xlate_out { /* Wildcards relevant in translation. Any fields that were used to * calculate the action must be set for caching and kernel @@ -50,6 +56,9 @@ struct xlate_out { ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */ mirror_mask_t mirrors; /* Bitmap of associated mirrors. */ + bool use_recirc; /* Should generate recirc? */ + struct xlate_recirc recirc; /* Information used for generating + * recirculation actions */ uint64_t odp_actions_stub[256 / 8]; struct ofpbuf odp_actions; }; @@ -129,7 +138,8 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name, const struct mbridge *, const struct dpif_sflow *, const struct dpif_ipfix *, const struct netflow *, enum ofp_config_flags, bool forward_bpdu, - bool has_in_band, bool variable_length_userdata, + bool has_in_band, bool enable_recirc, + bool variable_length_userdata, size_t mpls_label_stack_length) OVS_REQ_WRLOCK(xlate_rwlock); void xlate_remove_ofproto(struct ofproto_dpif *) OVS_REQ_WRLOCK(xlate_rwlock); @@ -161,8 +171,8 @@ int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet, void xlate_actions(struct xlate_in *, struct xlate_out *) OVS_EXCLUDED(xlate_rwlock); void xlate_in_init(struct xlate_in *, struct ofproto_dpif *, - const struct flow *, struct rule_dpif *, uint16_t tcp_flags, - const struct ofpbuf *packet); + const struct flow *, struct rule_dpif *, + uint16_t tcp_flags, const struct ofpbuf *packet); void xlate_out_uninit(struct xlate_out *); void xlate_actions_for_side_effects(struct xlate_in *); void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index f72d53e18..cb0151601 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -253,7 +253,9 @@ struct dpif_backer { bool recv_set_enable; /* Enables or disables receiving packets. */ + /* Recirculation. */ struct recirc_id_pool *rid_pool; /* Recirculation ID pool. */ + bool enable_recirc; /* True if the datapath supports recirculation */ /* True if the datapath supports variable-length * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. @@ -332,9 +334,15 @@ ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto) return ofproto->backer->max_mpls_depth; } +bool +ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto) +{ + return ofproto->backer->enable_recirc; +} + static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port); -static void ofproto_trace(struct ofproto_dpif *, const struct flow *, +static void ofproto_trace(struct ofproto_dpif *, struct flow *, const struct ofpbuf *packet, const struct ofpact[], size_t ofpacts_len, struct ds *); @@ -583,6 +591,7 @@ type_run(const char *type) ofproto->netflow, ofproto->up.frag_handling, ofproto->up.forward_bpdu, connmgr_has_in_band(ofproto->up.connmgr), + ofproto->backer->enable_recirc, ofproto->backer->variable_length_userdata, ofproto->backer->max_mpls_depth); @@ -808,6 +817,7 @@ struct odp_garbage { static bool check_variable_length_userdata(struct dpif_backer *backer); static size_t check_max_mpls_depth(struct dpif_backer *backer); +static bool check_recirc(struct dpif_backer *backer); static int open_dpif_backer(const char *type, struct dpif_backer **backerp) @@ -908,6 +918,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) close_dpif_backer(backer); return error; } + backer->enable_recirc = check_recirc(backer); backer->variable_length_userdata = check_variable_length_userdata(backer); backer->max_mpls_depth = check_max_mpls_depth(backer); backer->rid_pool = recirc_id_pool_create(); @@ -919,6 +930,59 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) return error; } +/* Tests whether 'backer''s datapath supports recirculation Only newer datapath + * supports OVS_KEY_ATTR in OVS_ACTION_ATTR_USERSPACE actions. We need to + * disable some features on older datapaths that don't support this feature. + * + * Returns false if 'backer' definitely does not support recirculation, true if + * it seems to support recirculation or if at least the error we get is + * ambiguous. */ +static bool +check_recirc(struct dpif_backer *backer) +{ + struct flow flow; + struct odputil_keybuf keybuf; + struct ofpbuf key; + int error; + bool enable_recirc = false; + + memset(&flow, 0, sizeof flow); + flow.recirc_id = 1; + flow.dp_hash = 1; + + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, &flow, 0); + + error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY, + key.data, key.size, NULL, 0, NULL, 0, NULL); + if (error && error != EEXIST) { + if (error != EINVAL) { + VLOG_WARN("%s: Reciculation flow probe failed (%s)", + dpif_name(backer->dpif), ovs_strerror(error)); + } + goto done; + } + + error = dpif_flow_del(backer->dpif, key.data, key.size, NULL); + if (error) { + VLOG_WARN("%s: failed to delete recirculation feature probe flow", + dpif_name(backer->dpif)); + } + + enable_recirc = true; + +done: + if (enable_recirc) { + VLOG_INFO("%s: Datapath supports recirculation", + dpif_name(backer->dpif)); + } else { + VLOG_INFO("%s: Datapath does not support recirculation", + dpif_name(backer->dpif)); + } + + return enable_recirc; +} + /* Tests whether 'backer''s datapath supports variable-length * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. We need * to disable some features on older datapaths that don't support this @@ -1102,51 +1166,27 @@ construct(struct ofproto *ofproto_) ofproto_init_tables(ofproto_, N_TABLES); error = add_internal_flows(ofproto); + ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY; return error; } static int -add_internal_flow(struct ofproto_dpif *ofproto, int id, +add_internal_miss_flow(struct ofproto_dpif *ofproto, int id, const struct ofpbuf *ofpacts, struct rule_dpif **rulep) { - struct ofputil_flow_mod fm; - struct classifier *cls; + struct match match; int error; + struct rule *rule; - match_init_catchall(&fm.match); - fm.priority = 0; - match_set_reg(&fm.match, 0, id); - fm.new_cookie = htonll(0); - fm.cookie = htonll(0); - fm.cookie_mask = htonll(0); - fm.modify_cookie = false; - fm.table_id = TBL_INTERNAL; - fm.command = OFPFC_ADD; - fm.idle_timeout = 0; - fm.hard_timeout = 0; - fm.buffer_id = 0; - fm.out_port = 0; - fm.flags = 0; - fm.ofpacts = ofpbuf_data(ofpacts); - fm.ofpacts_len = ofpbuf_size(ofpacts); + match_init_catchall(&match); + match_set_reg(&match, 0, id); - error = ofproto_flow_mod(&ofproto->up, &fm); - if (error) { - VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)", - id, ofperr_to_string(error)); - return error; - } + error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, ofpacts, &rule); + *rulep = error ? NULL : rule_dpif_cast(rule); - cls = &ofproto->up.tables[TBL_INTERNAL].cls; - fat_rwlock_rdlock(&cls->rwlock); - *rulep = rule_dpif_cast(rule_from_cls_rule( - classifier_lookup(cls, &fm.match.flow, NULL))); - ovs_assert(*rulep != NULL); - fat_rwlock_unlock(&cls->rwlock); - - return 0; + return error; } static int @@ -1155,6 +1195,9 @@ add_internal_flows(struct ofproto_dpif *ofproto) struct ofpact_controller *controller; uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; + struct rule *unused_rulep OVS_UNUSED; + struct ofpact_resubmit *resubmit; + struct match match; int error; int id; @@ -1167,20 +1210,53 @@ add_internal_flows(struct ofproto_dpif *ofproto) controller->reason = OFPR_NO_MATCH; ofpact_pad(&ofpacts); - error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule); + error = add_internal_miss_flow(ofproto, id++, &ofpacts, + &ofproto->miss_rule); if (error) { return error; } ofpbuf_clear(&ofpacts); - error = add_internal_flow(ofproto, id++, &ofpacts, + error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->no_packet_in_rule); if (error) { return error; } - error = add_internal_flow(ofproto, id++, &ofpacts, + error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->drop_frags_rule); + if (error) { + return error; + } + + /* Continue non-recirculation rule lookups from table 0. + * + * (priority=2), recirc=0, actions=resubmit(, 0) + */ + resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->ofpact.compat = 0; + resubmit->in_port = OFPP_IN_PORT; + resubmit->table_id = 0; + + match_init_catchall(&match); + match_set_recirc_id(&match, 0); + + error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, &ofpacts, + &unused_rulep); + if (error) { + return error; + } + + /* Drop any run away recirc rule lookups. Recirc_id has to be + * non-zero when reaching this rule. + * + * (priority=1), *, actions=drop + */ + ofpbuf_clear(&ofpacts); + match_init_catchall(&match); + error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, &ofpacts, + &unused_rulep); + return error; } @@ -1248,6 +1324,7 @@ run(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); uint64_t new_seq, new_dump_seq; + const bool enable_recirc = ofproto_dpif_get_enable_recirc(ofproto); if (mbridge_need_revalidate(ofproto->mbridge)) { ofproto->backer->need_revalidate = REV_RECONFIGURE; @@ -1325,12 +1402,17 @@ run(struct ofproto *ofproto_) /* All outstanding data in existing flows has been accounted, so it's a * good time to do bond rebalancing. */ - if (ofproto->has_bonded_bundles) { + if (enable_recirc && ofproto->has_bonded_bundles) { struct ofbundle *bundle; HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { - if (bundle->bond) { - bond_rebalance(bundle->bond); + struct bond *bond = bundle->bond; + + if (bond && bond_may_recirc(bond, NULL, NULL)) { + bond_recirculation_account(bond); + if (bond_rebalance(bundle->bond)) { + bond_update_post_recirc_rules(bond, true); + } } } } @@ -2348,12 +2430,13 @@ bundle_set(struct ofproto *ofproto_, void *aux, ofproto->backer->need_revalidate = REV_RECONFIGURE; } } else { - bundle->bond = bond_create(s->bond); + bundle->bond = bond_create(s->bond, ofproto); ofproto->backer->need_revalidate = REV_RECONFIGURE; } LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - bond_slave_register(bundle->bond, port, port->up.netdev); + bond_slave_register(bundle->bond, port, + port->up.ofp_port, port->up.netdev); } } else { bond_unref(bundle->bond); @@ -3003,6 +3086,7 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, ovs_assert((rule != NULL) != (ofpacts != NULL)); dpif_flow_stats_extract(flow, packet, time_msec(), &stats); + if (rule) { rule_dpif_credit_stats(rule, &stats); } @@ -3085,20 +3169,13 @@ rule_dpif_get_actions(const struct rule_dpif *rule) return rule_get_actions(&rule->up); } -/* Lookup 'flow' in table 0 of 'ofproto''s classifier. - * If 'wc' is non-null, sets the fields that were relevant as part of - * the lookup. Returns the table_id where a match or miss occurred. - * - * The return value will be zero unless there was a miss and - * OFPTC_TABLE_MISS_CONTINUE is in effect for the sequence of tables - * where misses occur. */ -uint8_t -rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, - struct flow_wildcards *wc, struct rule_dpif **rule) +static uint8_t +rule_dpif_lookup__ (struct ofproto_dpif *ofproto, const struct flow *flow, + struct flow_wildcards *wc, struct rule_dpif **rule) { enum rule_dpif_lookup_verdict verdict; enum ofputil_port_config config = 0; - uint8_t table_id = 0; + uint8_t table_id = TBL_INTERNAL; verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true, &table_id, rule); @@ -3134,6 +3211,23 @@ rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, return table_id; } +/* Lookup 'flow' in table 0 of 'ofproto''s classifier. + * If 'wc' is non-null, sets the fields that were relevant as part of + * the lookup. Returns the table_id where a match or miss occurred. + * + * The return value will be zero unless there was a miss and + * O!-TC_TABLE_MISS_CONTINUE is in effect for the sequence of tables + * where misses occur. */ +uint8_t +rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, + struct flow_wildcards *wc, struct rule_dpif **rule) +{ + /* Set metadata to the value of recirc_id to speed up internal + * rule lookup. */ + flow->metadata = htonll(flow->recirc_id); + return rule_dpif_lookup__(ofproto, flow, wc, rule); +} + static struct rule_dpif * rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, const struct flow *flow, struct flow_wildcards *wc) @@ -4058,7 +4152,7 @@ exit: * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to * trace, otherwise the actions are determined by a flow table lookup. */ static void -ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, +ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow, const struct ofpbuf *packet, const struct ofpact ofpacts[], size_t ofpacts_len, struct ds *ds) @@ -4410,7 +4504,7 @@ set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid) if (realdev_ofp_port && ofport->bundle) { /* vlandevs are enslaved to their realdevs, so they are not allowed to * themselves be part of a bundle. */ - bundle_set(ofport->up.ofproto, ofport->bundle, NULL); + bundle_set(ofport_->ofproto, ofport->bundle, NULL); } ofport->realdev_ofp_port = realdev_ofp_port; @@ -4661,6 +4755,78 @@ ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id) recirc_id_free(backer->rid_pool, recirc_id); } +int +ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto, + struct match *match, int priority, + const struct ofpbuf *ofpacts, + struct rule **rulep) +{ + struct ofputil_flow_mod fm; + struct rule_dpif *rule; + int error; + + fm.match = *match; + fm.priority = priority; + fm.new_cookie = htonll(0); + fm.cookie = htonll(0); + fm.cookie_mask = htonll(0); + fm.modify_cookie = false; + fm.table_id = TBL_INTERNAL; + fm.command = OFPFC_ADD; + fm.idle_timeout = 0; + fm.hard_timeout = 0; + fm.buffer_id = 0; + fm.out_port = 0; + fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY; + fm.ofpacts = ofpacts->data; + fm.ofpacts_len = ofpacts->size; + + error = ofproto_flow_mod(&ofproto->up, &fm); + if (error) { + VLOG_ERR_RL(&rl, "failed to add internal flow (%s)", + ofperr_to_string(error)); + *rulep = NULL; + return error; + } + + rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &match->flow, + &match->wc); + if (rule) { + rule_dpif_unref(rule); + *rulep = &rule->up; + } else { + OVS_NOT_REACHED(); + } + return 0; +} + +int +ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto, + struct match *match, int priority) +{ + struct ofputil_flow_mod fm; + int error; + + fm.match = *match; + fm.priority = priority; + fm.new_cookie = htonll(0); + fm.cookie = htonll(0); + fm.cookie_mask = htonll(0); + fm.modify_cookie = false; + fm.table_id = TBL_INTERNAL; + fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY; + fm.command = OFPFC_DELETE_STRICT; + + error = ofproto_flow_mod(&ofproto->up, &fm); + if (error) { + VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)", + ofperr_to_string(error)); + return error; + } + + return 0; +} + const struct ofproto_class ofproto_dpif_class = { init, enumerate_types, diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index ae6f9b75c..ed0aa90a8 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -21,6 +21,7 @@ #include "odp-util.h" #include "ofp-util.h" #include "ovs-thread.h" +#include "ofproto-provider.h" #include "timer.h" #include "util.h" #include "ovs-thread.h" @@ -83,9 +84,10 @@ extern struct ovs_rwlock xlate_rwlock; * actions into datapath actions. */ size_t ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *); +bool ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *); -uint8_t rule_dpif_lookup(struct ofproto_dpif *, const struct flow *, - struct flow_wildcards *, struct rule_dpif **rule); +uint8_t rule_dpif_lookup(struct ofproto_dpif *, struct flow *, + struct flow_wildcards *, struct rule_dpif **rule); enum rule_dpif_lookup_verdict rule_dpif_lookup_from_table(struct ofproto_dpif *, const struct flow *, @@ -103,6 +105,7 @@ void rule_dpif_credit_stats(struct rule_dpif *rule , bool rule_dpif_is_fail_open(const struct rule_dpif *); bool rule_dpif_is_table_miss(const struct rule_dpif *); bool rule_dpif_is_internal(const struct rule_dpif *); +uint8_t rule_dpif_get_table(const struct rule_dpif *); struct rule_actions *rule_dpif_get_actions(const struct rule_dpif *); @@ -207,4 +210,11 @@ struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t); uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto); void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id); +int ofproto_dpif_add_internal_flow(struct ofproto_dpif *, + struct match *, int priority, + const struct ofpbuf *ofpacts, + struct rule **rulep); +int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *, + int priority); + #endif /* ofproto-dpif.h */ diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 9f37f7170..bfa023522 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -205,7 +205,8 @@ void ofproto_port_set_state(struct ofport *, enum ofputil_port_state); */ enum oftable_flags { OFTABLE_HIDDEN = 1 << 0, /* Hide from most OpenFlow operations. */ - OFTABLE_READONLY = 1 << 1 /* Don't allow OpenFlow to change this table. */ + OFTABLE_READONLY = 1 << 1 /* Don't allow OpenFlow controller to change + this table. */ }; /* A flow table within a "struct ofproto". diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 677da8c92..a517264f5 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -261,7 +261,8 @@ struct ofport_usage { /* rule. */ static void ofproto_rule_destroy__(struct rule *); static void ofproto_rule_send_removed(struct rule *, uint8_t reason); -static bool rule_is_modifiable(const struct rule *); +static bool rule_is_modifiable(const struct rule *rule, + enum ofputil_flow_mod_flags flag); /* OpenFlow. */ static enum ofperr add_flow(struct ofproto *, struct ofconn *, @@ -1143,6 +1144,24 @@ ofproto_get_n_tables(const struct ofproto *ofproto) return ofproto->n_tables; } +/* Returns the number of Controller visible OpenFlow tables + * in 'ofproto'. This number will exclude Hidden tables. + * This funtion's return value should be less or equal to that of + * ofproto_get_n_tables() . */ +uint8_t +ofproto_get_n_visible_tables(const struct ofproto *ofproto) +{ + uint8_t n = ofproto->n_tables; + + /* Count only non-hidden tables in the number of tables. (Hidden tables, + * if present, are always at the end.) */ + while(n && (ofproto->tables[n - 1].flags & OFTABLE_HIDDEN)) { + n--; + } + + return n; +} + /* Configures the OpenFlow table in 'ofproto' with id 'table_id' with the * settings from 's'. 'table_id' must be in the range 0 through the number of * OpenFlow tables in 'ofproto' minus 1, inclusive. @@ -2741,19 +2760,27 @@ destroy_rule_executes(struct ofproto *ofproto) static bool ofproto_rule_is_hidden(const struct rule *rule) { - return rule->cr.priority > UINT16_MAX; -} - -static enum oftable_flags -rule_get_flags(const struct rule *rule) -{ - return rule->ofproto->tables[rule->table_id].flags; + return (rule->cr.priority > UINT16_MAX); } static bool -rule_is_modifiable(const struct rule *rule) +oftable_is_modifiable(const struct oftable *table, + enum ofputil_flow_mod_flags flags) { - return !(rule_get_flags(rule) & OFTABLE_READONLY); + if (flags & OFPUTIL_FF_NO_READONLY) { + return true; + } + + return !(table->flags & OFTABLE_READONLY); +} + +static bool +rule_is_modifiable(const struct rule *rule, enum ofputil_flow_mod_flags flags) +{ + const struct oftable *rule_table; + + rule_table = &rule->ofproto->tables[rule->table_id]; + return oftable_is_modifiable(rule_table, flags); } static enum ofperr @@ -2771,26 +2798,14 @@ handle_features_request(struct ofconn *ofconn, const struct ofp_header *oh) struct ofport *port; bool arp_match_ip; struct ofpbuf *b; - int n_tables; - int i; ofproto->ofproto_class->get_features(ofproto, &arp_match_ip, &features.actions); ovs_assert(features.actions & OFPUTIL_A_OUTPUT); /* sanity check */ - /* Count only non-hidden tables in the number of tables. (Hidden tables, - * if present, are always at the end.) */ - n_tables = ofproto->n_tables; - for (i = 0; i < ofproto->n_tables; i++) { - if (ofproto->tables[i].flags & OFTABLE_HIDDEN) { - n_tables = i; - break; - } - } - features.datapath_id = ofproto->datapath_id; features.n_buffers = pktbuf_capacity(); - features.n_tables = n_tables; + features.n_tables = ofproto_get_n_visible_tables(ofproto); features.capabilities = (OFPUTIL_C_FLOW_STATS | OFPUTIL_C_TABLE_STATS | OFPUTIL_C_PORT_STATS | OFPUTIL_C_QUEUE_STATS); if (arp_match_ip) { @@ -3968,10 +3983,18 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn, table = &ofproto->tables[table_id]; - if (table->flags & OFTABLE_READONLY) { + if (!oftable_is_modifiable(table, fm->flags)) { return OFPERR_OFPBRC_EPERM; } + if (!(fm->flags & OFPUTIL_FF_HIDDEN_FIELDS)) { + if (!match_has_default_hidden_fields(&fm->match)) { + VLOG_WARN_RL(&rl, "%s: (add_flow) only internal flows can set " + "non-default values to hidden fields", ofproto->name); + return OFPERR_OFPBRC_EPERM; + } + } + cls_rule_init(&cr, &fm->match, fm->priority); /* Transform "add" into "modify" if there's an existing identical flow. */ @@ -3980,7 +4003,7 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn, fat_rwlock_unlock(&table->cls.rwlock); if (rule) { cls_rule_destroy(&cr); - if (!rule_is_modifiable(rule)) { + if (!rule_is_modifiable(rule, fm->flags)) { return OFPERR_OFPBRC_EPERM; } else if (rule->pending) { return OFPROTO_POSTPONE; @@ -4108,7 +4131,7 @@ modify_flows__(struct ofproto *ofproto, struct ofconn *ofconn, /* FIXME: Implement OFPFUTIL_FF_RESET_COUNTS */ - if (rule_is_modifiable(rule)) { + if (rule_is_modifiable(rule, fm->flags)) { /* At least one rule is modifiable, don't report EPERM error. */ error = 0; } else { diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 3f3557cf6..ab51365c8 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -382,6 +382,7 @@ struct ofproto_table_settings { }; int ofproto_get_n_tables(const struct ofproto *); +uint8_t ofproto_get_n_visible_tables(const struct ofproto *); void ofproto_configure_table(struct ofproto *, int table_id, const struct ofproto_table_settings *); diff --git a/tests/classifier.at b/tests/classifier.at index 45146bacf..b6c9352c1 100644 --- a/tests/classifier.at +++ b/tests/classifier.at @@ -40,22 +40,22 @@ table=0 in_port=3 priority=0,ip,action=drop AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no Datapath actions: 1 ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.2,nw_frag=no Datapath actions: drop ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80 + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80 Datapath actions: drop ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79 + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79 Datapath actions: 2 ]) OVS_VSWITCHD_STOP @@ -87,22 +87,22 @@ table=0 in_port=3 priority=0,ip,action=drop AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=192.168.0.0/16,nw_frag=no Datapath actions: drop ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=2,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=2,nw_dst=192.168.0.0/16,nw_frag=no Datapath actions: 1 ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80 + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=80 Datapath actions: drop ]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79 + [Megaflow: recirc_id=0,skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79 Datapath actions: 3 ]) OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"]) diff --git a/tests/lacp.at b/tests/lacp.at index d44bee0e8..0db207738 100644 --- a/tests/lacp.at +++ b/tests/lacp.at @@ -1,5 +1,10 @@ AT_BANNER([lacp]) +# Strips out Reciulation ID information since it may change over time. +m4_define([STRIP_RECIRC_ID], [[sed ' + s/Recirc-ID.*$// +' ]]) + AT_SETUP([lacp - config]) OVS_VSWITCHD_START([\ add-port br0 p1 --\ @@ -113,6 +118,7 @@ slave: p2: expired attached AT_CHECK([ovs-appctl bond/show], [0], [dnl ---- bond ---- bond_mode: active-backup +bond may use recirculation: no, Recirc-ID : -1 bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms @@ -182,8 +188,8 @@ done AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 -ovs-appctl bond/show bond0 -ovs-appctl bond/show bond1], [0], [stdout]) +ovs-appctl bond/show bond0 | STRIP_RECIRC_ID +ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [stdout]) AT_CHECK([sed '/active slave/d' stdout], [0], [dnl ---- bond0 ---- status: active negotiated @@ -275,6 +281,7 @@ slave: p3: current attached partner state: activity timeout aggregation synchronized collecting distributing ---- bond0 ---- bond_mode: balance-tcp +bond may use recirculation: yes, bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms @@ -288,6 +295,7 @@ slave p1: enabled ---- bond1 ---- bond_mode: balance-tcp +bond may use recirculation: yes, bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms @@ -316,8 +324,8 @@ for i in `seq 0 40`; do ovs-appctl time/warp 100; done AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 -ovs-appctl bond/show bond0 -ovs-appctl bond/show bond1], [0], [dnl +ovs-appctl bond/show bond0 | STRIP_RECIRC_ID +ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 @@ -408,6 +416,7 @@ slave: p3: current attached partner state: activity timeout aggregation synchronized collecting distributing ---- bond0 ---- bond_mode: balance-tcp +bond may use recirculation: yes, bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms @@ -422,6 +431,7 @@ slave p1: enabled ---- bond1 ---- bond_mode: balance-tcp +bond may use recirculation: yes, bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms @@ -442,8 +452,8 @@ for i in `seq 0 40`; do ovs-appctl time/warp 100; done AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 -ovs-appctl bond/show bond0 -ovs-appctl bond/show bond1], [0], [dnl +ovs-appctl bond/show bond0 | STRIP_RECIRC_ID +ovs-appctl bond/show bond1 | STRIP_RECIRC_ID ], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 @@ -534,6 +544,7 @@ slave: p3: current attached partner state: activity timeout aggregation synchronized collecting distributing ---- bond0 ---- bond_mode: balance-tcp +bond may use recirculation: yes, bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms @@ -548,6 +559,7 @@ slave p1: enabled ---- bond1 ---- bond_mode: balance-tcp +bond may use recirculation: yes, bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 441cdf2db..7ec3d8e39 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -47,6 +47,148 @@ skb_priority(0),in_port(8),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_ OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif, active-backup bonding]) +# Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and p2 +# and br1 with interfaces p3, p4 and p8. +# toggle p1,p2 of bond0 up and down to test bonding in active-backup mode. +OVS_VSWITCHD_START( + [add-bond br0 bond0 p1 p2 bond_mode=active-backup --\ + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \ + add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \ + add-br br1 -- \ + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ + fail-mode=secure -- \ + add-port br1 p3 -- set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 -- \ + add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 -- \ + add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --]) + +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) +AT_CHECK([ovs-ofctl add-flow br1 action=normal]) +ovs-appctl netdev-dummy/set-admin-state up +ovs-appctl time/warp 100 +ovs-appctl netdev-dummy/set-admin-state p2 down +ovs-appctl time/stop +ovs-appctl time/warp 100 +AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +ovs-appctl time/warp 100 +ovs-appctl netdev-dummy/set-admin-state p2 up +ovs-appctl netdev-dummy/set-admin-state p1 down +ovs-appctl time/warp 100 +AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +ovs-appctl time/warp 100 +ovs-appctl time/warp 100 +AT_CHECK([ovs-appctl dpif/dump-flows br1 | STRIP_XOUT], [0], [dnl +skb_priority(0),in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: +skb_priority(0),in_port(3),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3/0.0.0.0,dst=10.0.0.4/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: +skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: +skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:never, actions: +skb_priority(0),in_port(4),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: +skb_priority(0),in_port(4),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: +]) +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ofproto-dpif, balance-slb bonding]) +# Create br0 with interfaces bond0(p1, p2, p3) and p7, +# and br1 with interfaces p4, p5, p6 and p8. +# p1 <-> p4, p2 <-> p5, p3 <-> p6 +# Send some traffic, make sure the traffic are spread based on source mac. +OVS_VSWITCHD_START( + [add-bond br0 bond0 p1 p2 p3 bond_mode=balance-slb --\ + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \ + set interface p3 type=dummy options:pstream=punix:$OVS_RUNDIR/p3.sock ofport_request=3 -- \ + add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \ + add-br br1 -- \ + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ + fail-mode=secure -- \ + add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=4 -- \ + add-port br1 p5 -- set interface p5 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=5 -- \ + add-port br1 p6 -- set interface p6 type=dummy options:stream=unix:$OVS_RUNDIR/p3.sock ofport_request=6 -- \ + add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --]) + +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) +AT_CHECK([ovs-ofctl add-flow br1 action=normal]) +AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK +]) +ovs-appctl netdev-dummy/set-admin-state up +ovs-appctl time/stop +ovs-appctl time/warp 100 +( +for i in `seq 0 100 |xargs printf '%02x\n'`; + do + pkt="in_port(7),eth(src=50:54:00:00:00:$i,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)" + AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt]) + done +) +ovs-appctl time/warp 100 +AT_CHECK([ovs-appctl dpif/dump-flows br1 > br1_flows.txt]) +# Make sure there is resonable distribution to all three ports. +# We don't want to make this check precise, in case hash function changes. +AT_CHECK([test `egrep 'in_port\(4\)' br1_flows.txt |wc -l` -gt 3]) +AT_CHECK([test `egrep 'in_port\(5\)' br1_flows.txt |wc -l` -gt 3]) +AT_CHECK([test `egrep 'in_port\(6\)' br1_flows.txt |wc -l` -gt 3]) +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ofproto-dpif, balance-tcp bonding]) +# Create br0 with interfaces bond0(p1, p2, p3) and p7, +# and br1 with interfaces bond1(p4, p5, p6) and p8. +# bond0 <-> bond1 +# Send some traffic, make sure the traffic are spread based on L4 headers. +OVS_VSWITCHD_START( + [add-bond br0 bond0 p1 p2 p3 bond_mode=balance-tcp lacp=active \ + other-config:lacp-time=fast other-config:bond-rebalance-interval=0 --\ + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \ + set interface p3 type=dummy options:pstream=punix:$OVS_RUNDIR/p3.sock ofport_request=3 -- \ + add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \ + add-br br1 -- \ + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ + fail-mode=secure -- \ + add-bond br1 bond1 p4 p5 p6 bond_mode=balance-tcp lacp=active \ + other-config:lacp-time=fast other-config:bond-rebalance-interval=0 --\ + set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=4 -- \ + set interface p5 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=5 -- \ + set interface p6 type=dummy options:stream=unix:$OVS_RUNDIR/p3.sock ofport_request=6 -- \ + add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --]) +AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK +]) +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) +AT_CHECK([ovs-ofctl add-flow br1 action=normal]) +AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [megaflows disabled +], []) +sleep 1; +ovs-appctl time/stop +ovs-appctl time/warp 100 +ovs-appctl lacp/show > lacp.txt +ovs-appctl bond/show > bond.txt +( +for i in `seq 10 100` ; + do + pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(0x010)" + AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt]) + done +) +ovs-appctl time/warp 100 +ovs-appctl time/warp 100 +ovs-appctl time/warp 100 +AT_CHECK([ovs-appctl dpif/dump-flows br0 |grep tcp > br0_flows.txt]) +AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > br1_flows.txt]) +# Make sure there is resonable distribution to all three ports. +# We don't want to make this check precise, in case hash function changes. +AT_CHECK([test `grep in_port.4 br1_flows.txt |wc -l` -gt 7]) +AT_CHECK([test `grep in_port.5 br1_flows.txt |wc -l` -gt 7]) +AT_CHECK([test `grep in_port.6 br1_flows.txt |wc -l` -gt 7]) +OVS_VSWITCHD_STOP() +AT_CLEANUP + AT_SETUP([ofproto-dpif - resubmit]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [10], [11], [12], [13], [14], [15], @@ -3909,7 +4051,7 @@ ovs-appctl time/stop ovs-appctl time/warp 5000 AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) -sleep 1 + AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_XOUT], [0], [dnl skb_priority(0),in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/255.255.255.255,dst=10.0.0.1/255.255.255.255,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: skb_priority(0),in_port(7),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4/255.255.255.255,dst=10.0.0.3/255.255.255.255,proto=1/0xff,tos=0/0,ttl=64/0,frag=no/0xff),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: