2
0
mirror of https://github.com/openvswitch/ovs synced 2025-09-03 15:55:19 +00:00

rconn: Introduce new invariant to fix assertion failure in corner case.

Until now, rconn_get_version() has only reported the OpenFlow version in
use when the rconn is actually connected.  This makes sense, but it has a
harsh consequence.  Consider code like this:

    if (rconn_is_connected(rconn) && rconn_get_version(rconn) >= 0) {
        for (int i = 0; i < 2; i++) {
            struct ofpbuf *b = ofputil_encode_echo_request(
                rconn_get_version(rconn));
            rconn_send(rconn, b, NULL);
        }
    }

Maybe not the smartest code in the world, and probably no one would write
this exact code in any case, but it doesn't look too risky or crazy.

But it is.  The second trip through the loop can assert-fail inside
ofputil_encode_echo_request() because rconn_get_version(rconn) returns -1
instead of a valid OpenFlow version.  That happens if the first call to
rconn_send() encounters an error while sending the message and therefore
destroys the underlying vconn and disconnects so that rconn_get_version()
doesn't have a vconn to query for its version.

In a case like this where all the code to send the messages is close by, we
could just check rconn_get_version() in each loop iteration.  We could even
go through the tree and convince ourselves that individual bits of code are
safe, or be conservative and check rconn_get_version() >= 0 in the iffy
cases.  But this seems to me like an ongoing source of risk and a way to
get things wrong in corner cases.

This commit takes a different approach.  It introduces a new invariant: if
an rconn has ever been connected, then it returns a valid OpenFlow version
from rconn_get_version().  In addition, if an rconn is currently connected,
then the OpenFlow version it returns is the correct one (that may be
obvious, but there were corner cases before where it returned -1 even
though rconn_is_connected() returned true).

With this commit, the code above would work OK.  If the first call to
rconn_send() encounters an error sending the message, then
rconn_get_version() in the second iteration will return the same value as
in the first iteration.  The message passed to rconn_send() will end up
being discarded, but that's much better than either an assertion failure or
having to carefully analyze a lot of our code to deal with one unusual
corner case.

Reported-by: Han Zhou <zhouhan@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Han Zhou <hzhou8@ebay.com>
This commit is contained in:
Ben Pfaff
2018-05-23 16:58:31 -07:00
parent 3ab8a26e77
commit 476d2551ab
3 changed files with 18 additions and 26 deletions

View File

@@ -305,7 +305,7 @@ lswitch_run(struct lswitch *sw)
rconn_run(sw->rconn);
if (sw->state == S_CONNECTING) {
if (rconn_get_version(sw->rconn) != -1) {
if (rconn_is_connected(sw->rconn)) {
lswitch_handshake(sw);
sw->state = S_FEATURES_REPLY;
}

View File

@@ -141,7 +141,8 @@ struct rconn {
struct vconn *monitors[MAXIMUM_MONITORS];
size_t n_monitors;
uint32_t allowed_versions;
uint32_t allowed_versions; /* Acceptable OpenFlow versions. */
int version; /* Current or most recent version. */
};
/* Counts packets and bytes queued into an rconn by a given source. */
@@ -182,8 +183,6 @@ static bool is_connected_state(enum state);
static bool is_admitted_msg(const struct ofpbuf *);
static bool rconn_logging_connection_attempts__(const struct rconn *rc)
OVS_REQUIRES(rc->mutex);
static int rconn_get_version__(const struct rconn *rconn)
OVS_REQUIRES(rconn->mutex);
/* The following prototypes duplicate those in rconn.h, but there we weren't
* able to add the OVS_EXCLUDED annotations because the definition of struct
@@ -284,7 +283,9 @@ rconn_create(int probe_interval, int max_backoff, uint8_t dscp,
rconn_set_dscp(rc, dscp);
rc->n_monitors = 0;
rc->allowed_versions = allowed_versions;
rc->version = -1;
return rc;
}
@@ -376,8 +377,7 @@ rconn_connect_unreliably(struct rconn *rc,
rconn_set_target__(rc, vconn_get_name(vconn), name);
rc->reliable = false;
rc->vconn = vconn;
rc->last_connected = time_now();
state_transition(rc, S_ACTIVE);
state_transition(rc, S_CONNECTING);
ovs_mutex_unlock(&rc->mutex);
}
@@ -514,6 +514,7 @@ run_CONNECTING(struct rconn *rc)
VLOG_INFO("%s: connected", rc->name);
rc->n_successful_connections++;
state_transition(rc, S_ACTIVE);
rc->version = vconn_get_version(rc->vconn);
rc->last_connected = rc->state_entered;
} else if (retval != EAGAIN) {
if (rconn_logging_connection_attempts__(rc)) {
@@ -575,14 +576,8 @@ run_ACTIVE(struct rconn *rc)
* can end up queuing a packet with vconn == NULL and then *boom*. */
state_transition(rc, S_IDLE);
/* Send an echo request if we can. (If version negotiation is not
* complete, that is, if we did not yet receive a "hello" message from
* the peer, we do not know the version to use, so we don't send
* anything.) */
int version = rconn_get_version__(rc);
if (version >= 0 && version <= 0xff) {
rconn_send__(rc, ofputil_encode_echo_request(version), NULL);
}
/* Send an echo request. */
rconn_send__(rc, ofputil_encode_echo_request(rc->version), NULL);
return;
}
@@ -944,23 +939,19 @@ rconn_failure_duration(const struct rconn *rconn)
return duration;
}
static int
rconn_get_version__(const struct rconn *rconn)
OVS_REQUIRES(rconn->mutex)
{
return rconn->vconn ? vconn_get_version(rconn->vconn) : -1;
}
/* Returns the OpenFlow version negotiated with the peer, or -1 if there is
* currently no connection or if version negotiation is not yet complete. */
/* Returns the OpenFlow version most recently negotiated with a peer, or -1 if
* no version has ever been negotiated.
*
* If 'rconn' is connected (that is, if 'rconn_is_connected(rconn)' would
* return true), then the return value is guaranteed to be the OpenFlow version
* in use for the connection. The converse is not true: when the return value
* is not -1, 'rconn' might be disconnected. */
int
rconn_get_version(const struct rconn *rconn)
OVS_EXCLUDED(rconn->mutex)
{
int version;
ovs_mutex_lock(&rconn->mutex);
version = rconn_get_version__(rconn);
int version = rconn->version;
ovs_mutex_unlock(&rconn->mutex);
return version;

View File

@@ -571,6 +571,7 @@ vconn_connect(struct vconn *vconn)
break;
case VCS_DISCONNECTED:
ovs_assert(vconn->error != 0);
return vconn->error;
default: