diff --git a/RELNOTES b/RELNOTES index a4c36129..93acdfd6 100644 --- a/RELNOTES +++ b/RELNOTES @@ -137,6 +137,15 @@ by Eric Young (eay@cryptsoft.com). the patch upon which the fix is based. [ISC-Bugs #32222] +- In the failover code, handle the case of communications being interrupted + when the servers are dealing with POTENTIAL-CONFLICT. This patch allows + the primary to accept the secondary moving from POTENTIAL-CONFLICT to + RESOLUTION-INTERRUPTED as well as handling the bind update process better. + In addition the code to resend update or update all requests has been + modified to send requests more often. + [ISC-Bugs #36810] + [ISC-Bugs #20352] + Changes since 4.3.1b1 - Modify the linux and openwrt dhclient scripts to process information diff --git a/includes/dhcpd.h b/includes/dhcpd.h index 2043708f..9ad641be 100644 --- a/includes/dhcpd.h +++ b/includes/dhcpd.h @@ -3419,6 +3419,7 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *, isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *, const char *); isc_result_t dhcp_failover_set_service_state (dhcp_failover_state_t *state); +void dhcp_failover_rescind_updates (dhcp_failover_state_t *); isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *, enum failover_state); isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *, diff --git a/server/failover.c b/server/failover.c index eb2612b7..e6d9fdd6 100644 --- a/server/failover.c +++ b/server/failover.c @@ -1519,8 +1519,16 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state, /* In these situations, we remain in the current * state, or if in startup enter those states. */ - case communications_interrupted: case conflict_done: + /* As the peer may not have received or may have + * lost track of updates we sent previously we + * rescind them, causing us to retransmit them + * on an update request. + */ + dhcp_failover_rescind_updates(state); + /* fall through */ + + case communications_interrupted: case partner_down: case paused: case recover: @@ -1703,6 +1711,52 @@ isc_result_t dhcp_failover_set_service_state (dhcp_failover_state_t *state) return ISC_R_SUCCESS; } +/*! + * \brief Return any leases on the ack queue back to the update queue + * + * Re-schedule any pending updates by moving them from the ack queue + * (update sent awaiting response) back to the update queue (need to + * send an update for this lease). This will result in a retransmission + * of the update. + * + * \param state is the state block for the failover connection we are + * updating. + */ + +void dhcp_failover_rescind_updates (dhcp_failover_state_t *state) +{ + struct lease *lp; + + if (state->ack_queue_tail == NULL) + return; + + /* Zap the flags. */ + for (lp = state->ack_queue_head; lp; lp = lp->next_pending) + lp->flags = ((lp->flags & ~ON_ACK_QUEUE) | ON_UPDATE_QUEUE); + + /* Now hook the ack queue to the beginning of the update queue. */ + if (state->update_queue_head) { + lease_reference(&state->ack_queue_tail->next_pending, + state->update_queue_head, MDL); + lease_dereference(&state->update_queue_head, MDL); + } + lease_reference(&state->update_queue_head, state->ack_queue_head, MDL); + + if (!state->update_queue_tail) { +#if defined (POINTER_DEBUG) + if (state->ack_queue_tail->next_pending) { + log_error("next pending on ack queue tail."); + abort(); + } +#endif + lease_reference(&state->update_queue_tail, + state->ack_queue_tail, MDL); + } + lease_dereference(&state->ack_queue_tail, MDL); + lease_dereference(&state->ack_queue_head, MDL); + state->cur_unacked_updates = 0; +} + isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state, enum failover_state new_state) { @@ -1721,37 +1775,9 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state, case normal: case potential_conflict: case partner_down: - if (state -> ack_queue_tail) { - struct lease *lp; - - /* Zap the flags. */ - for (lp = state -> ack_queue_head; lp; lp = lp -> next_pending) - lp -> flags = ((lp -> flags & ~ON_ACK_QUEUE) | - ON_UPDATE_QUEUE); + /* Move the ack queue to the update queue */ + dhcp_failover_rescind_updates(state); - /* Now hook the ack queue to the beginning of the update - queue. */ - if (state -> update_queue_head) { - lease_reference (&state -> ack_queue_tail -> next_pending, - state -> update_queue_head, MDL); - lease_dereference (&state -> update_queue_head, MDL); - } - lease_reference (&state -> update_queue_head, - state -> ack_queue_head, MDL); - if (!state -> update_queue_tail) { -#if defined (POINTER_DEBUG) - if (state -> ack_queue_tail -> next_pending) { - log_error ("next pending on ack queue tail."); - abort (); - } -#endif - lease_reference (&state -> update_queue_tail, - state -> ack_queue_tail, MDL); - } - lease_dereference (&state -> ack_queue_tail, MDL); - lease_dereference (&state -> ack_queue_head, MDL); - state -> cur_unacked_updates = 0; - } /* We will re-queue a timeout later, if applicable. */ cancel_timeout (dhcp_failover_keepalive, state); break; @@ -1859,7 +1885,9 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state, break; case potential_conflict: - if (state -> i_am == primary) + if ((state->i_am == primary) || + ((state->i_am == secondary) && + (state->partner.state == conflict_done))) dhcp_failover_send_update_request (state); break; @@ -1960,7 +1988,18 @@ isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state, if (state -> partner.state == new_state && state -> me.state) { switch (state -> me.state) { case startup: - dhcp_failover_set_state (state, state -> saved_state); + /* + * If we have a peer state we must be connected. + * If so we should move to potential_conflict + * instead of resolution_interrupted, otherwise + * back to whereever we were before we stopped. + */ + if (state->saved_state == resolution_interrupted) + dhcp_failover_set_state(state, + potential_conflict); + else + dhcp_failover_set_state(state, + state->saved_state); return ISC_R_SUCCESS; case unknown_state: @@ -2183,6 +2222,17 @@ isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state, dhcp_failover_set_state(state, new_state); break; + case potential_conflict: + case resolution_interrupted: + /* + * This can happen when the connection is lost and + * recovered after the primary has moved to + * conflict-done but the secondary is still in + * potential-conflict. In that case, we have to + * remain in conflict-done. + */ + break; + default: log_fatal("Peer %s: Invalid attempt to move from %s " "to %s while local state is conflict-done.", @@ -4861,35 +4911,42 @@ isc_result_t dhcp_failover_send_update_request (dhcp_failover_state_t *state) # define FMA (char *)0, (unsigned *)0, 0 #endif - if (!state -> link_to_peer || - state -> link_to_peer -> type != dhcp_type_failover_link) - return DHCP_R_INVALIDARG; - link = (dhcp_failover_link_t *)state -> link_to_peer; + if (!state->link_to_peer || + state->link_to_peer->type != dhcp_type_failover_link) + return (DHCP_R_INVALIDARG); + link = (dhcp_failover_link_t *)state->link_to_peer; - if (!link -> outer || link -> outer -> type != omapi_type_connection) - return DHCP_R_INVALIDARG; + if (!link->outer || link->outer->type != omapi_type_connection) + return (DHCP_R_INVALIDARG); - if (state -> curUPD) - return ISC_R_ALREADYRUNNING; + /* We allow an update to be restarted in case we requested an update + * and were interrupted by something. If we had an ALL going we need + * to restart that. Otherwise we simply continue with the request */ + if (state->curUPD == FTM_UPDREQALL) { + return (dhcp_failover_send_update_request_all(state)); + } - status = (dhcp_failover_put_message - (link, link -> outer, - FTM_UPDREQ, link->xid++, - (failover_option_t *)0)); + status = (dhcp_failover_put_message(link, link->outer, FTM_UPDREQ, + link->xid++, NULL)); - if (status == ISC_R_SUCCESS) - state -> curUPD = FTM_UPDREQ; + state->curUPD = FTM_UPDREQ; #if defined (DEBUG_FAILOVER_MESSAGES) if (status != ISC_R_SUCCESS) - failover_print (FMA, " (failed)"); - failover_print (FMA, ")"); + failover_print(FMA, " (failed)"); + failover_print(FMA, ")"); if (obufix) { - log_debug ("%s", obuf); + log_debug("%s", obuf); } #endif - log_info ("Sent update request message to %s", state -> name); - return status; + + if (status == ISC_R_SUCCESS) { + log_info("Sent update request message to %s", state->name); + } else { + log_error("Failed to send update request all message to %s: %s", + state->name, isc_result_totext(status)); + } + return (status); } isc_result_t dhcp_failover_send_update_request_all (dhcp_failover_state_t @@ -4907,36 +4964,39 @@ isc_result_t dhcp_failover_send_update_request_all (dhcp_failover_state_t # define FMA (char *)0, (unsigned *)0, 0 #endif - if (!state -> link_to_peer || - state -> link_to_peer -> type != dhcp_type_failover_link) - return DHCP_R_INVALIDARG; - link = (dhcp_failover_link_t *)state -> link_to_peer; + if (!state->link_to_peer || + state->link_to_peer->type != dhcp_type_failover_link) + return (DHCP_R_INVALIDARG); + link = (dhcp_failover_link_t *)state->link_to_peer; - if (!link -> outer || link -> outer -> type != omapi_type_connection) - return DHCP_R_INVALIDARG; + if (!link->outer || link->outer->type != omapi_type_connection) + return (DHCP_R_INVALIDARG); - /* If there is an UPDREQ in progress, then upgrade to UPDREQALL. */ - if (state -> curUPD && (state -> curUPD != FTM_UPDREQ)) - return ISC_R_ALREADYRUNNING; + /* We allow an update to be restarted in case we requested an update + * and were interrupted by something. + */ - status = (dhcp_failover_put_message - (link, link -> outer, - FTM_UPDREQALL, link->xid++, - (failover_option_t *)0)); + status = (dhcp_failover_put_message(link, link->outer, FTM_UPDREQALL, + link->xid++, NULL)); - if (status == ISC_R_SUCCESS) - state -> curUPD = FTM_UPDREQALL; + state->curUPD = FTM_UPDREQALL; #if defined (DEBUG_FAILOVER_MESSAGES) if (status != ISC_R_SUCCESS) - failover_print (FMA, " (failed)"); - failover_print (FMA, ")"); + failover_print(FMA, " (failed)"); + failover_print(FMA, ")"); if (obufix) { - log_debug ("%s", obuf); + log_debug("%s", obuf); } #endif - log_info ("Sent update request all message to %s", state -> name); - return status; + + if (status == ISC_R_SUCCESS) { + log_info("Sent update request all message to %s", state->name); + } else { + log_error("Failed to send update request all message to %s: %s", + state->name, isc_result_totext(status)); + } + return (status); } isc_result_t dhcp_failover_send_update_done (dhcp_failover_state_t *state)