diff --git a/src/hooks/dhcp/high_availability/command_creator.cc b/src/hooks/dhcp/high_availability/command_creator.cc index fc5488d628..d2c87d1d00 100644 --- a/src/hooks/dhcp/high_availability/command_creator.cc +++ b/src/hooks/dhcp/high_availability/command_creator.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -175,6 +175,13 @@ CommandCreator::createLease6GetPage(const Lease6Ptr& last_lease6, return (command); } +ConstElementPtr +CommandCreator::createMaintenanceNotify(const HAServerType& server_type) { + auto command = config::createCommand("ha-maintenance-notify"); + insertService(command, server_type); + return (command); +} + void CommandCreator::insertLeaseExpireTime(ElementPtr& lease) { if ((lease->getType() != Element::map) || diff --git a/src/hooks/dhcp/high_availability/command_creator.h b/src/hooks/dhcp/high_availability/command_creator.h index 769ce346ba..897776a7bb 100644 --- a/src/hooks/dhcp/high_availability/command_creator.h +++ b/src/hooks/dhcp/high_availability/command_creator.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -132,6 +132,12 @@ public: createLease6GetPage(const dhcp::Lease6Ptr& lease6, const uint32_t limit); + /// @brief Creates ha-maintenance-notify command. + /// + /// @return Pointer to the JSON representation of the command. + static data::ConstElementPtr + createMaintenanceNotify(const HAServerType& server_typ); + private: /// @brief Replaces "cltt" with "expire" value within the lease. diff --git a/src/hooks/dhcp/high_availability/ha_callouts.cc b/src/hooks/dhcp/high_availability/ha_callouts.cc index 10f17c3813..582c8aa7f9 100644 --- a/src/hooks/dhcp/high_availability/ha_callouts.cc +++ b/src/hooks/dhcp/high_availability/ha_callouts.cc @@ -221,6 +221,18 @@ int maintenance_notify_command(CalloutHandle& handle) { return (0); } +/// @brief ha-maintenance-start command handler implementation. +int maintenance_start_command(CalloutHandle& handle) { + try { + impl->maintenanceStartHandler(handle); + + } catch (const std::exception& ex) { + LOG_ERROR(ha_logger, HA_MAINTENANCE_START_HANDLER_FAILED) + .arg(ex.what()); + } + + return (0); +} /// @brief This function is called when the library is loaded. /// @@ -257,6 +269,7 @@ int load(LibraryHandle& handle) { handle.registerCommandCallout("ha-scopes", scopes_command); handle.registerCommandCallout("ha-continue", continue_command); handle.registerCommandCallout("ha-maintenance-notify", maintenance_notify_command); + handle.registerCommandCallout("ha-maintenance-start", maintenance_start_command); } catch (const std::exception& ex) { LOG_ERROR(ha_logger, HA_CONFIGURATION_FAILED) diff --git a/src/hooks/dhcp/high_availability/ha_impl.cc b/src/hooks/dhcp/high_availability/ha_impl.cc index 94f47a0771..a07a7c8170 100644 --- a/src/hooks/dhcp/high_availability/ha_impl.cc +++ b/src/hooks/dhcp/high_availability/ha_impl.cc @@ -419,5 +419,11 @@ HAImpl::maintenanceNotifyHandler(hooks::CalloutHandle& callout_handle) { callout_handle.setArgument("response", response); } +void +HAImpl::maintenanceStartHandler(hooks::CalloutHandle& callout_handle) { + ConstElementPtr response = service_->processMaintenanceStart(); + callout_handle.setArgument("response", response); +} + } // end of namespace isc::ha } // end of namespace isc diff --git a/src/hooks/dhcp/high_availability/ha_impl.h b/src/hooks/dhcp/high_availability/ha_impl.h index 57dd21ed1c..9b6764f69c 100644 --- a/src/hooks/dhcp/high_availability/ha_impl.h +++ b/src/hooks/dhcp/high_availability/ha_impl.h @@ -149,6 +149,11 @@ public: /// @param callout_handle Callout handle provided to the callout. void maintenanceNotifyHandler(hooks::CalloutHandle& callout_handle); + /// @brief Implements handler for the ha-maintenance-start command. + /// + /// @param callout_handle Callout handle provided to the callout. + void maintenanceStartHandler(hooks::CalloutHandle& callout_handle); + protected: /// @brief Holds parsed configuration. diff --git a/src/hooks/dhcp/high_availability/ha_messages.cc b/src/hooks/dhcp/high_availability/ha_messages.cc index 987d5db4a9..46e62becbc 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.cc +++ b/src/hooks/dhcp/high_availability/ha_messages.cc @@ -1,4 +1,4 @@ -// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Tue Jan 14 2020 12:44 +// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Tue Jan 14 2020 18:59 #include #include @@ -58,7 +58,13 @@ extern const isc::log::MessageID HA_LOAD_BALANCING_DUID_MISSING = "HA_LOAD_BALAN extern const isc::log::MessageID HA_LOAD_BALANCING_IDENTIFIER_MISSING = "HA_LOAD_BALANCING_IDENTIFIER_MISSING"; extern const isc::log::MessageID HA_LOCAL_DHCP_DISABLE = "HA_LOCAL_DHCP_DISABLE"; extern const isc::log::MessageID HA_LOCAL_DHCP_ENABLE = "HA_LOCAL_DHCP_ENABLE"; +extern const isc::log::MessageID HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED = "HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED"; +extern const isc::log::MessageID HA_MAINTENANCE_NOTIFY_FAILED = "HA_MAINTENANCE_NOTIFY_FAILED"; extern const isc::log::MessageID HA_MAINTENANCE_NOTIFY_HANDLER_FAILED = "HA_MAINTENANCE_NOTIFY_HANDLER_FAILED"; +extern const isc::log::MessageID HA_MAINTENANCE_SHUTDOWN_SAFE = "HA_MAINTENANCE_SHUTDOWN_SAFE"; +extern const isc::log::MessageID HA_MAINTENANCE_STARTED = "HA_MAINTENANCE_STARTED"; +extern const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN = "HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN"; +extern const isc::log::MessageID HA_MAINTENANCE_START_HANDLER_FAILED = "HA_MAINTENANCE_START_HANDLER_FAILED"; extern const isc::log::MessageID HA_MISSING_CONFIGURATION = "HA_MISSING_CONFIGURATION"; extern const isc::log::MessageID HA_SCOPES_HANDLER_FAILED = "HA_SCOPES_HANDLER_FAILED"; extern const isc::log::MessageID HA_SERVICE_STARTED = "HA_SERVICE_STARTED"; @@ -128,7 +134,13 @@ const char* values[] = { "HA_LOAD_BALANCING_IDENTIFIER_MISSING", "load balancing failed for the DHCPv4 message (transaction id: %1) because HW address and client identifier are missing", "HA_LOCAL_DHCP_DISABLE", "local DHCP service is disabled while the %1 is in the %2 state", "HA_LOCAL_DHCP_ENABLE", "local DHCP service is enabled while the %1 is in the %2 state", + "HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED", "failed to send ha-maintenance-notify to %1: %2", + "HA_MAINTENANCE_NOTIFY_FAILED", "error returned while processing ha-maintenance-notify by %1: %2", "HA_MAINTENANCE_NOTIFY_HANDLER_FAILED", "ha-maintenance-notify command failed: %1", + "HA_MAINTENANCE_SHUTDOWN_SAFE", "the server can now be shutdown for maintenance as the partner has taken over the DHCP traffic", + "HA_MAINTENANCE_STARTED", "the server is now in the partner-maintained mode and the partner is in the maintained mode", + "HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN", "the server is now in the partner-down mode as a result of requested maintenance", + "HA_MAINTENANCE_START_HANDLER_FAILED", "ha-maintenance-start command failed: %1", "HA_MISSING_CONFIGURATION", "high-availability parameter not specified for High Availability hooks library", "HA_SCOPES_HANDLER_FAILED", "ha-scopes command failed: %1", "HA_SERVICE_STARTED", "started high availability service in %1 mode as %2 server", diff --git a/src/hooks/dhcp/high_availability/ha_messages.h b/src/hooks/dhcp/high_availability/ha_messages.h index 7455044ee5..0157e8d5ba 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.h +++ b/src/hooks/dhcp/high_availability/ha_messages.h @@ -1,4 +1,4 @@ -// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Tue Jan 14 2020 12:44 +// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Tue Jan 14 2020 18:59 #ifndef HA_MESSAGES_H #define HA_MESSAGES_H @@ -59,7 +59,13 @@ extern const isc::log::MessageID HA_LOAD_BALANCING_DUID_MISSING; extern const isc::log::MessageID HA_LOAD_BALANCING_IDENTIFIER_MISSING; extern const isc::log::MessageID HA_LOCAL_DHCP_DISABLE; extern const isc::log::MessageID HA_LOCAL_DHCP_ENABLE; +extern const isc::log::MessageID HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED; +extern const isc::log::MessageID HA_MAINTENANCE_NOTIFY_FAILED; extern const isc::log::MessageID HA_MAINTENANCE_NOTIFY_HANDLER_FAILED; +extern const isc::log::MessageID HA_MAINTENANCE_SHUTDOWN_SAFE; +extern const isc::log::MessageID HA_MAINTENANCE_STARTED; +extern const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN; +extern const isc::log::MessageID HA_MAINTENANCE_START_HANDLER_FAILED; extern const isc::log::MessageID HA_MISSING_CONFIGURATION; extern const isc::log::MessageID HA_SCOPES_HANDLER_FAILED; extern const isc::log::MessageID HA_SERVICE_STARTED; diff --git a/src/hooks/dhcp/high_availability/ha_messages.mes b/src/hooks/dhcp/high_availability/ha_messages.mes index 9247f7becf..5e832a2fcc 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.mes +++ b/src/hooks/dhcp/high_availability/ha_messages.mes @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2019 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2017-2020 Internet Systems Consortium, Inc. ("ISC") $NAMESPACE isc::ha @@ -302,11 +302,56 @@ is enabled because the server remains in a state in which it should respond to the DHCP clients. The first argument specifies server name. The second argument specifies server's state. +% HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED failed to send ha-maintenance-notify to %1: %2 +This warning message indicates that there was a problem in communication with a +HA peer while sending the ha-maintenance-notify command. The first argument provides the +remote server's name. The second argument provides a reason for failure. + +% HA_MAINTENANCE_NOTIFY_FAILED error returned while processing ha-maintenance-notify by %1: %2 +This warning message indicates that a peer returned an error status code +in response to a ha-maintenance-notify command. The first argument provides the +remote server's name. The second argument provides a reason for failure. + % HA_MAINTENANCE_NOTIFY_HANDLER_FAILED ha-maintenance-notify command failed: %1 This error message is issued to indicate that the ha-maintenance-notify command handler failed while processing the command. The argument provides the reason for failure. +% HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN the server is now in the partner-down mode as a result of requested maintenance +This informational message is displayed when the server receiving the +ha-maintenance-start command transitions to the partner-down state +because it was unable to communicate with the partner while receiving +the command. It is assumed that in such situation the partner is +already offline for the maintenance. Note that in this case the +normal failover procedure does not take place. The server does not wait +for a heartbeat to fail several times, nor it monitors the DHCP traffic +for not responded queries. In the maintenance case the server transitions +to the partner-down state when it first encounters a communication +problem with the partner. + +% HA_MAINTENANCE_START_HANDLER_FAILED ha-maintenance-start command failed: %1 +This error message is issued to indicate that the ha-maintenance-start command +handler failed while processing the command. The argument provides the reason for +failure. + +% HA_MAINTENANCE_STARTED the server is now in the partner-maintained mode and the partner is in the maintained mode +This informational message is displayed when the server receiving the +ha-maintenance-start command transitions to the partner-maintained +state. The server does it after sending the ha-maintenance-notify to +its partner to put the partner in the maintained state. From now on, +the server in the partner-maintained state will be responding to all +queries and the partner will respond to no queries. The partner may be +safely shut down for maintenance in which case this server will +automatically transition from the partner-maintained state to the +partner-down state. + +% HA_MAINTENANCE_SHUTDOWN_SAFE the server can now be shutdown for maintenance as the partner has taken over the DHCP traffic +This informational message is displayed after the server transitions to the +maintained state. This server no longer responds to any DHCP queries and its +partner being in the partner-maintained has taken over the DHCP traffic. +When the server being in the maintained state is shut down, the partner +will move to the partner-down imediatelly. + % HA_MISSING_CONFIGURATION high-availability parameter not specified for High Availability hooks library This error message is issued to indicate that the configuration for the High Availability hooks library hasn't been specified. The 'high-availability' diff --git a/src/hooks/dhcp/high_availability/ha_service.cc b/src/hooks/dhcp/high_availability/ha_service.cc index 5cd6277572..928432d5e1 100644 --- a/src/hooks/dhcp/high_availability/ha_service.cc +++ b/src/hooks/dhcp/high_availability/ha_service.cc @@ -42,6 +42,8 @@ const int HAService::HA_HEARTBEAT_COMPLETE_EVT; const int HAService::HA_LEASE_UPDATES_COMPLETE_EVT; const int HAService::HA_SYNCING_FAILED_EVT; const int HAService::HA_SYNCING_SUCCEEDED_EVT; +const int HAService::HA_MAINTENANCE_NOTIFY_EVT; +const int HAService::HA_MAINTENANCE_START_EVT; HAService::HAService(const IOServicePtr& io_service, const NetworkStatePtr& network_state, const HAConfigPtr& config, const HAServerType& server_type) @@ -72,6 +74,7 @@ HAService::defineEvents() { defineEvent(HA_SYNCING_FAILED_EVT, "HA_SYNCING_FAILED_EVT"); defineEvent(HA_SYNCING_SUCCEEDED_EVT, "HA_SYNCING_SUCCEEDED_EVT"); defineEvent(HA_MAINTENANCE_NOTIFY_EVT, "HA_MAINTENANCE_NOTIFY_EVT"); + defineEvent(HA_MAINTENANCE_START_EVT, "HA_MAINTENANCE_START_EVT"); } void @@ -83,6 +86,7 @@ HAService::verifyEvents() { getEvent(HA_SYNCING_FAILED_EVT); getEvent(HA_SYNCING_SUCCEEDED_EVT); getEvent(HA_MAINTENANCE_NOTIFY_EVT); + getEvent(HA_MAINTENANCE_START_EVT); } void @@ -216,6 +220,8 @@ HAService::maintainedStateHandler() { // Log if the state machine is paused. conditionalLogPausedState(); + + LOG_INFO(ha_logger, HA_MAINTENANCE_SHUTDOWN_SAFE); } scheduleHeartbeat(); @@ -232,11 +238,16 @@ HAService::partnerDownStateHandler() { // serving scopes appropriate for the new state. We don't do it if // we remain in this state. if (doOnEntry()) { + + bool maintenance = (getLastEvent() == HA_MAINTENANCE_START_EVT); + // It may be administratively disabled to handle partner's scope // in case of failure. If this is the case we'll just handle our // default scope (or no scope at all). The user will need to // manually enable this server to handle partner's scope. - if (config_->getThisServerConfig()->isAutoFailover()) { + // If we're in the maintenance mode we serve all scopes because + // it is not a failover situation. + if (maintenance || config_->getThisServerConfig()->isAutoFailover()) { query_filter_.serveFailoverScopes(); } else { query_filter_.serveDefaultScopes(); @@ -245,6 +256,12 @@ HAService::partnerDownStateHandler() { // Log if the state machine is paused. conditionalLogPausedState(); + + if (maintenance) { + // If we ended up in the partner-down state as a result of + // receiving the ha-maintenance-start command let's log it. + LOG_INFO(ha_logger, HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN); + } } scheduleHeartbeat(); @@ -289,19 +306,14 @@ HAService::partnerMaintainedStateHandler() { // serving scopes appropriate for the new state. We don't do it if // we remain in this state. if (doOnEntry()) { - // It may be administratively disabled to handle partner's scope - // in case of failure. If this is the case we'll just handle our - // default scope (or no scope at all). The user will need to - // manually enable this server to handle partner's scope. - if (config_->getThisServerConfig()->isAutoFailover()) { - query_filter_.serveFailoverScopes(); - } else { - query_filter_.serveDefaultScopes(); - } + query_filter_.serveFailoverScopes(); + adjustNetworkState(); // Log if the state machine is paused. conditionalLogPausedState(); + + LOG_INFO(ha_logger, HA_MAINTENANCE_STARTED); } scheduleHeartbeat(); @@ -1754,7 +1766,7 @@ HAService::processScopes(const std::vector& scopes) { return (createAnswer(CONTROL_RESULT_SUCCESS, "New HA scopes configured.")); } -data::ConstElementPtr +ConstElementPtr HAService::processContinue() { if (unpause()) { return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine continues.")); @@ -1762,7 +1774,7 @@ HAService::processContinue() { return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine is not paused.")); } -data::ConstElementPtr +ConstElementPtr HAService::processMaintenanceNotify() { switch (getCurrState()) { case HA_BACKUP_ST: @@ -1778,6 +1790,127 @@ HAService::processMaintenanceNotify() { return (createAnswer(CONTROL_RESULT_SUCCESS, "Server is in maintained state.")); } +ConstElementPtr +HAService::processMaintenanceStart() { + switch (getCurrState()) { + case HA_BACKUP_ST: + case HA_MAINTAINED_ST: + case HA_PARTNER_MAINTAINED_ST: + case HA_TERMINATED_ST: + return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition the server from" + " the " + stateToString(getCurrState()) + " to" + " partner-maintained state.")); + default: + ; + } + + HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig(); + + // Create HTTP/1.1 request including our command. + PostHttpRequestJsonPtr request = boost::make_shared + (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(), + HostHttpHeader(remote_config->getUrl().getHostname())); + request->setBodyAsJson(CommandCreator::createMaintenanceNotify(server_type_)); + request->finalize(); + + // Response object should also be created because the HTTP client needs + // to know the type of the expected response. + HttpResponseJsonPtr response = boost::make_shared(); + + IOService io_service; + HttpClient client(io_service); + + boost::system::error_code captured_ec; + std::string captured_error_message; + + // Schedule asynchronous HTTP request. + client.asyncSendRequest(remote_config->getUrl(), request, response, + [this, remote_config, &io_service, &captured_ec, &captured_error_message] + (const boost::system::error_code& ec, + const HttpResponsePtr& response, + const std::string& error_str) { + + io_service.stop(); + + // There are three possible groups of errors. One is the IO error + // causing issues in communication with the peer. Another one is + // an HTTP parsing error. The last type of error is when non-success + // error code is returned in the response carried in the HTTP message + // or if the JSON response is otherwise broken. + + std::string error_message; + + // Handle first two groups of errors. + if (ec || !error_str.empty()) { + error_message = (ec ? ec.message() : error_str); + LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED) + .arg(remote_config->getLogLabel()) + .arg(error_message); + + } else { + + // Handle third group of errors. + try { + static_cast(verifyAsyncResponse(response)); + + } catch (const std::exception& ex) { + error_message = ex.what(); + LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_FAILED) + .arg(remote_config->getLogLabel()) + .arg(error_message); + } + } + + // If there was an error communicating with the partner, mark the + // partner as unavailable. + if (!error_message.empty()) { + communication_state_->setPartnerState("unavailable"); + } + + captured_ec = ec; + captured_error_message = error_message; + }, + HttpClient::RequestTimeout(TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST), + boost::bind(&HAService::clientConnectHandler, this, _1, _2), + boost::bind(&HAService::clientCloseHandler, this, _1) + ); + + // Run the IO service until it is stopped by any of the callbacks. This + // makes it synchronous. + io_service.run(); + + // If there was a communication problem with the partner we assume that + // the partner is already down while we receive this command. + if (captured_ec) { + verboseTransition(HA_PARTNER_DOWN_ST); + runModel(HA_MAINTENANCE_START_EVT); + return (createAnswer(CONTROL_RESULT_SUCCESS, + "Server is now in the partner-down state as its" + " partner appears to be offline for maintenance.")); + + + } else if (captured_error_message.empty()) { + // If the partner responded indicating no error it means that the + // partner has been transitioned to the maintained state. In that + // case we transition to the partner-maintained state. + verboseTransition(HA_PARTNER_MAINTAINED_ST); + runModel(HA_MAINTENANCE_START_EVT); + + } else { + // Partner server returned an error so this server can't transition to + // the partner-maintained mode. + return (createAnswer(CONTROL_RESULT_ERROR, "Partner server responded with" + " the following error to the ha-maintenance-notify" + " commmand: " + captured_error_message + ".")); + + } + + return (createAnswer(CONTROL_RESULT_SUCCESS, + "Server is now in the partner-maintained state" + " and its partner is in the maintained state. The partner" + " can be now safely shut down.")); +} + ConstElementPtr HAService::verifyAsyncResponse(const HttpResponsePtr& response) { // The response must cast to JSON type. diff --git a/src/hooks/dhcp/high_availability/ha_service.h b/src/hooks/dhcp/high_availability/ha_service.h index dd81d7df5d..490a3856ef 100644 --- a/src/hooks/dhcp/high_availability/ha_service.h +++ b/src/hooks/dhcp/high_availability/ha_service.h @@ -53,6 +53,9 @@ public: /// ha-maintenance-notify command received. static const int HA_MAINTENANCE_NOTIFY_EVT = SM_DERIVED_EVENT_MIN + 5; + /// ha-maintenance-start command received. + static const int HA_MAINTENANCE_START_EVT = SM_DERIVED_EVENT_MIN + 6; + protected: /// @brief Callback invoked when request was sent and a response received @@ -780,6 +783,21 @@ public: /// @return Pointer to the reponse to the ha-maintenance-notify. data::ConstElementPtr processMaintenanceNotify(); + /// @brief Processes ha-maintenance-start command and returns a response. + /// + /// The server receiving this command will try to send the + /// ha-maintenance-notify command to the partner to instruct the partner + /// to transition to the maintained state. In this state the partner will + /// not respond to any DHCP queries. Next, this server will transition to + /// the ha-partner-maintained state and therefore will start responding + /// to all DHCP queries. If the partner responds to the ha-maintenance-notify + /// with an error, this server won't transition to the partner-maintained + /// state and signal an error to the caller. If the partner is unavailable, + /// this server will directly transition to the partner-down state. + /// + /// @return Pointer to the response to the ha-maintenance-start. + data::ConstElementPtr processMaintenanceStart(); + protected: /// @brief Checks if the response is valid or contains an error. diff --git a/src/hooks/dhcp/high_availability/tests/command_creator_unittest.cc b/src/hooks/dhcp/high_availability/tests/command_creator_unittest.cc index eb2972ac12..62bcc51285 100644 --- a/src/hooks/dhcp/high_availability/tests/command_creator_unittest.cc +++ b/src/hooks/dhcp/high_availability/tests/command_creator_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -392,4 +392,18 @@ TEST(CommandCreatorTest, createLease6GetPageZeroLimit) { EXPECT_THROW(CommandCreator::createLease6GetPage(lease6, 0), BadValue); } +// This test verifies that the ha-maintenance-notify command is correct +// while being sent to the DHCPv4 server. +TEST(CommandCreatorTest, createMaintenanceNotify4) { + ConstElementPtr command = CommandCreator::createMaintenanceNotify(HAServerType::DHCPv4); + ASSERT_NO_FATAL_FAILURE(testCommandBasics(command, "ha-maintenance-notify", "dhcp4")); +} + +// This test verifies that the ha-maintenance-notify command is correct +// while being sent to the DHCPv6 server. +TEST(CommandCreatorTest, createMaintenanceNotify6) { + ConstElementPtr command = CommandCreator::createMaintenanceNotify(HAServerType::DHCPv6); + ASSERT_NO_FATAL_FAILURE(testCommandBasics(command, "ha-maintenance-notify", "dhcp6")); +} + } diff --git a/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc b/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc index 2f67043c16..6af66b536d 100644 --- a/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc +++ b/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc @@ -2627,6 +2627,135 @@ TEST_F(HAServiceTest, processMaintenanceNotify) { " partner-maintained to maintained state."); } +// This test verifies the case when the server receiving the ha-maintenance-start +// command successfully transitions to the partner-maintained state and its +// partner transitions to the maintained state. +TEST_F(HAServiceTest, processMaintenanceStartSuccess) { + // Create HA configuration for 3 servers. This server is + // server 1. + HAConfigPtr config_storage = createValidConfiguration(); + + // Start the servers. + ASSERT_NO_THROW({ + listener_->start(); + listener2_->start(); + }); + + HAService service(io_service_, network_state_, config_storage); + + // The tested function is synchronous, so we need to run server side IO service + // in background to not block the main thread. + auto thread = runIOServiceInThread(); + + // Process ha-maintenance-start command. + ConstElementPtr rsp; + ASSERT_NO_THROW(rsp = service.processMaintenanceStart()); + + // Stop the IO service. This should cause the thread to terminate. + io_service_->stop(); + thread->join(); + io_service_->get_io_service().reset(); + io_service_->poll(); + + // The partner of our server is online and should have responded with + // the success status. Therefore, this server should have transitioned + // to the partner-maintained state. + ASSERT_TRUE(rsp); + checkAnswer(rsp, CONTROL_RESULT_SUCCESS, "Server is now in the partner-maintained state" + " and its partner is in the maintained state. The partner can be now safely" + " shut down."); + + EXPECT_EQ(HA_PARTNER_MAINTAINED_ST, service.getCurrState()); +} + +// This test verifies the case that the server transitions to the partner-down +// state after receiving the ha-maintenance-start command. This is the case +// when the communication with the partner server fails while this command +// is received. It is assumed that the partner server is already terminated +// for maintenance. +TEST_F(HAServiceTest, processMaintenanceStartPartnerDown) { + // Create HA configuration for 3 servers. This server is + // server 1. + HAConfigPtr config_storage = createValidConfiguration(); + + // Start the server, but don't start the partner. This simulates + // the case that the partner is already down for maintenance. + ASSERT_NO_THROW({ + listener_->start(); + }); + + HAService service(io_service_, network_state_, config_storage); + + // The tested function is synchronous, so we need to run server side IO service + // in background to not block the main thread. + auto thread = runIOServiceInThread(); + + // Process ha-maintenance-start command. + ConstElementPtr rsp; + ASSERT_NO_THROW(rsp = service.processMaintenanceStart()); + + // Stop the IO service. This should cause the thread to terminate. + io_service_->stop(); + thread->join(); + io_service_->get_io_service().reset(); + io_service_->poll(); + + // The partner of our server is online and should have responded with + // the success status. Therefore, this server should have transitioned + // to the partner-maintained state. + ASSERT_TRUE(rsp); + checkAnswer(rsp, CONTROL_RESULT_SUCCESS, + "Server is now in the partner-down state as its" + " partner appears to be offline for maintenance."); + + EXPECT_EQ(HA_PARTNER_DOWN_ST, service.getCurrState()); +} + +// This test verifies the case when the server is receiving +// ha-maintenance-start command and tries to notify the partner +// which returns an error. +TEST_F(HAServiceTest, processMaintenanceStartPartnerError) { + // Create HA configuration for 3 servers. This server is + // server 1. + HAConfigPtr config_storage = createValidConfiguration(); + + // Simulate an error returned by the partner. + factory2_->getResponseCreator()->setControlResult(CONTROL_RESULT_ERROR); + + // Start the servers. + ASSERT_NO_THROW({ + listener_->start(); + listener2_->start(); + }); + + HAService service(io_service_, network_state_, config_storage); + + // The tested function is synchronous, so we need to run server side IO service + // in background to not block the main thread. + auto thread = runIOServiceInThread(); + + // Process ha-maintenance-start command. + ConstElementPtr rsp; + ASSERT_NO_THROW(rsp = service.processMaintenanceStart()); + + // Stop the IO service. This should cause the thread to terminate. + io_service_->stop(); + thread->join(); + io_service_->get_io_service().reset(); + io_service_->poll(); + + // The partner of our server is online and should have responded with + // the success status. Therefore, this server should have transitioned + // to the partner-maintained state. + ASSERT_TRUE(rsp); + checkAnswer(rsp, CONTROL_RESULT_ERROR, "Partner server responded with" + " the following error to the ha-maintenance-notify commmand:" + " response returned, error code 1."); + + // The state shouldn't change. + EXPECT_EQ(HA_WAITING_ST, service.getCurrState()); +} + /// @brief HA partner to the server under test. /// /// This is a wrapper class around @c HttpListener which simulates a @@ -3024,8 +3153,9 @@ public: /// state. /// @param dhcp_enabled Indicates whether DHCP service is expected to be enabled /// or disabled in the given state. + /// @param event Event to be passed to the tested handler. void expectScopes(const MyState& my_state, const std::vector& scopes, - const bool dhcp_enabled) { + const bool dhcp_enabled, const int event = TestHAService::NOP_EVT) { // If expecting no scopes, let's enable some scope to make sure that the // code changes this setting. @@ -3048,6 +3178,7 @@ public: } // Transition to the desired state. + service_->postNextEvent(event); service_->verboseTransition(my_state.state_); // Run the handler. service_->runModel(TestHAService::NOP_EVT); @@ -4254,10 +4385,15 @@ TEST_F(HAServiceStateMachineTest, scopesServingLoadBalancingNoFailover) { expectScopes(MyState(HA_LOAD_BALANCING_ST), { "server1" }, true); expectScopes(MyState(HA_TERMINATED_ST), { "server1" }, true); - // PARTNER MAINTAINED & PARTNER DOWN: still serving my own scope - // because auto-failover is disabled. + // PARTNER DOWN: still serving my own scope because auto-failover is disabled. expectScopes(MyState(HA_PARTNER_DOWN_ST), { "server1" }, true); - expectScopes(MyState(HA_PARTNER_MAINTAINED_ST), { "server1" }, true); + + // PARTNER MAINTAINED: always serving all scopes. + expectScopes(MyState(HA_PARTNER_MAINTAINED_ST), { "server1", "server2" }, true); + + // Same for the partner-down case during maintenance. + expectScopes(MyState(HA_PARTNER_DOWN_ST), { "server1", "server2" }, true, + HAService::HA_MAINTENANCE_START_EVT); // MAINTAINED, READY & WAITING: serving no scopes. expectScopes(MyState(HA_MAINTAINED_ST), { }, false); @@ -4949,10 +5085,15 @@ TEST_F(HAServiceStateMachineTest, scopesServingHotStandbyStandbyNoFailover) { // TERMINATED: serving no scopes because the primary is active. expectScopes(MyState(HA_TERMINATED_ST), { }, true); - // PARTNER MAINTAINED & PARTNER DOWN: still serving no scopes because auto-failover is - // set to false. + // PARTNER DOWN: still serving no scopes because auto-failover is set to false. expectScopes(MyState(HA_PARTNER_DOWN_ST), { }, true); - expectScopes(MyState(HA_PARTNER_MAINTAINED_ST), { }, true); + + // PARTNER MAINTAINED: serving partner's scopes. + expectScopes(MyState(HA_PARTNER_MAINTAINED_ST), { "server1" }, true); + + // Same for the partner-down case during maintenance. + expectScopes(MyState(HA_PARTNER_DOWN_ST), { "server1" }, true, + HAService::HA_MAINTENANCE_START_EVT); // MAINTAINED, READY & WAITING: serving no scopes. expectScopes(MyState(HA_MAINTAINED_ST), { }, false);