diff --git a/src/hooks/dhcp/high_availability/communication_state.cc b/src/hooks/dhcp/high_availability/communication_state.cc index 6af4d4e987..049c61a414 100644 --- a/src/hooks/dhcp/high_availability/communication_state.cc +++ b/src/hooks/dhcp/high_availability/communication_state.cc @@ -50,7 +50,8 @@ CommunicationState::CommunicationState(const IOServicePtr& io_service, poke_time_(boost::posix_time::microsec_clock::universal_time()), heartbeat_impl_(0), partner_state_(-1), partner_scopes_(), clock_skew_(0, 0, 0, 0), last_clock_skew_warn_(), - my_time_at_skew_(), partner_time_at_skew_() { + my_time_at_skew_(), partner_time_at_skew_(), + analyzed_messages_count_(0) { } CommunicationState::~CommunicationState() { @@ -158,6 +159,7 @@ CommunicationState::poke() { // partner, we need to clear any gathered information because the connection // seems to be (re)established. clearUnackedClients(); + analyzed_messages_count_ = 0; if (timer_) { // Check the duration since last poke. If it is less than a second, we don't @@ -188,6 +190,11 @@ CommunicationState::isCommunicationInterrupted() const { return (getDurationInMillisecs() > config_->getMaxResponseDelay()); } +size_t +CommunicationState::getAnalyzedMessagesCount() const { + return (analyzed_messages_count_); +} + bool CommunicationState::clockSkewShouldWarn() { // First check if the clock skew is beyond the threshold. @@ -275,6 +282,8 @@ CommunicationState4::analyzeMessage(const boost::shared_ptr& message) isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv4 message"); } + ++analyzed_messages_count_; + // Check value of the "secs" field by comparing it with the configured // threshold. uint16_t secs = msg->getSecs(); @@ -349,6 +358,8 @@ CommunicationState6::analyzeMessage(const boost::shared_ptr& message) isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv6 message"); } + ++analyzed_messages_count_; + // Check the value of the "elapsed time" option. If it is below the threshold // there is nothing to do. The "elapsed time" option holds the time in // 1/100 of second, hence we have to multiply by 10 to get a value in milliseconds. diff --git a/src/hooks/dhcp/high_availability/communication_state.h b/src/hooks/dhcp/high_availability/communication_state.h index e4effff475..1acf690669 100644 --- a/src/hooks/dhcp/high_availability/communication_state.h +++ b/src/hooks/dhcp/high_availability/communication_state.h @@ -198,6 +198,13 @@ public: /// this method. virtual void analyzeMessage(const boost::shared_ptr& message) = 0; + /// @brief Returns the number of analyzed messages while being in the + /// communications interrupted state. + /// + /// @return Number of analyzed messages. It includes retransmissions by + /// the same clients. + size_t getAnalyzedMessagesCount() const; + /// @brief Checks if the partner failure has been detected based /// on the DHCP traffic analysis. /// @@ -351,6 +358,9 @@ protected: /// @brief Partner reported time when skew was calculated. boost::posix_time::ptime partner_time_at_skew_; + + /// @brief Total number of analyzed messages to be responded by partner. + size_t analyzed_messages_count_; }; /// @brief Type of the pointer to the @c CommunicationState object. diff --git a/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc b/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc index 23888d770f..04bae7df24 100644 --- a/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc +++ b/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc @@ -190,6 +190,7 @@ TEST_F(CommunicationStateTest, detectFailureV4) { // Initially, there should be no unacked clients recorded. ASSERT_FALSE(state_.failureDetected()); EXPECT_EQ(0, state_.getUnackedClientsCount()); + EXPECT_EQ(0, state_.getAnalyzedMessagesCount()); // The maximum number of unacked clients is 10. Let's provide 10 // DHCPDISCOVER messages with the "secs" value of 15 which exceeds @@ -209,6 +210,7 @@ TEST_F(CommunicationStateTest, detectFailureV4) { << static_cast(i); } EXPECT_EQ(10, state_.getUnackedClientsCount()); + EXPECT_EQ(10, state_.getAnalyzedMessagesCount()); // Let's provide similar set of requests but this time the "secs" field is // below the threshold. They should not be counted as failures. Also, @@ -221,6 +223,7 @@ TEST_F(CommunicationStateTest, detectFailureV4) { << static_cast(i); } EXPECT_EQ(10, state_.getUnackedClientsCount()); + EXPECT_EQ(20, state_.getAnalyzedMessagesCount()); // Let's create a message from a new (not recorded yet) client with the // "secs" field value below the threshold. It should not be recorded. @@ -229,18 +232,21 @@ TEST_F(CommunicationStateTest, detectFailureV4) { // Still no failure. ASSERT_FALSE(state_.failureDetected()); EXPECT_EQ(10, state_.getUnackedClientsCount()); + EXPECT_EQ(21, state_.getAnalyzedMessagesCount()); // Let's repeat one of the requests which already have been recorded as // unacked but with a greater value of "secs" field. This should not // be counted because only new clients count. ASSERT_NO_THROW(state_.analyzeMessage(createMessage4(DHCPDISCOVER, 3, 3, 20))); ASSERT_FALSE(state_.failureDetected()); + EXPECT_EQ(22, state_.getAnalyzedMessagesCount()); // This time let's simulate a client with a MAC address already recorded but // with a client identifier. This should be counted as a new unacked request. ASSERT_NO_THROW(state_.analyzeMessage(createMessage4(DHCPDISCOVER, 7, 7, 15))); ASSERT_TRUE(state_.failureDetected()); EXPECT_EQ(11, state_.getUnackedClientsCount()); + EXPECT_EQ(23, state_.getAnalyzedMessagesCount()); // Poking should cause all counters to reset as it is an indication that the // control connection has been re-established. @@ -249,6 +255,7 @@ TEST_F(CommunicationStateTest, detectFailureV4) { // We're back to no failure state. EXPECT_FALSE(state_.failureDetected()); EXPECT_EQ(0, state_.getUnackedClientsCount()); + EXPECT_EQ(0, state_.getAnalyzedMessagesCount()); // Send 11 DHCPDISCOVER messages with the "secs" field bytes swapped. Swapping // bytes was reported for some misbehaving Windows clients. The server should @@ -264,6 +271,7 @@ TEST_F(CommunicationStateTest, detectFailureV4) { << " when testing swapped secs field bytes"; } EXPECT_EQ(0, state_.getUnackedClientsCount()); + EXPECT_EQ(11, state_.getAnalyzedMessagesCount()); // Repeat the same test, but this time either the first byte exceeds the // secs threshold or the second byte is non-zero. All should be counted @@ -283,6 +291,7 @@ TEST_F(CommunicationStateTest, detectFailureV4) { 0x30))); EXPECT_TRUE(state_.failureDetected()); EXPECT_EQ(11, state_.getUnackedClientsCount()); + EXPECT_EQ(22, state_.getAnalyzedMessagesCount()); } // This test verifies that it is possible to disable analysis of the DHCPv4 @@ -297,7 +306,8 @@ TEST_F(CommunicationStateTest, failureDetectionDisabled4) { TEST_F(CommunicationStateTest, detectFailureV6) { // Initially, there should be no unacked clients recorded. ASSERT_FALSE(state6_.failureDetected()); - EXPECT_EQ(0, state_.getUnackedClientsCount()); + EXPECT_EQ(0, state6_.getUnackedClientsCount()); + EXPECT_EQ(0, state6_.getAnalyzedMessagesCount()); // The maximum number of unacked clients is 10. Let's provide 10 // Solicit messages with the "elapsed time" value of 1500 which exceeds @@ -314,6 +324,7 @@ TEST_F(CommunicationStateTest, detectFailureV6) { << static_cast(i); } EXPECT_EQ(10, state6_.getUnackedClientsCount()); + EXPECT_EQ(10, state6_.getAnalyzedMessagesCount()); // Let's provide similar set of requests but this time the "elapsed time" is // below the threshold. They should not be counted as failures. Also, @@ -326,6 +337,7 @@ TEST_F(CommunicationStateTest, detectFailureV6) { << static_cast(i); } EXPECT_EQ(10, state6_.getUnackedClientsCount()); + EXPECT_EQ(20, state6_.getAnalyzedMessagesCount()); // Let's create a message from a new (not recorded yet) client with the // "elapsed time" value below the threshold. It should not be recorded. @@ -334,6 +346,7 @@ TEST_F(CommunicationStateTest, detectFailureV6) { // Still no failure. ASSERT_FALSE(state6_.failureDetected()); EXPECT_EQ(10, state6_.getUnackedClientsCount()); + EXPECT_EQ(21, state6_.getAnalyzedMessagesCount()); // Let's repeat one of the requests which already have been recorded as // unacked but with a greater value of "elapsed time". This should not @@ -341,11 +354,13 @@ TEST_F(CommunicationStateTest, detectFailureV6) { ASSERT_NO_THROW(state6_.analyzeMessage(createMessage6(DHCPV6_SOLICIT, 3, 2000))); ASSERT_FALSE(state6_.failureDetected()); EXPECT_EQ(10, state6_.getUnackedClientsCount()); + EXPECT_EQ(22, state6_.getAnalyzedMessagesCount()); // New unacked client should cause failure to the detected. ASSERT_NO_THROW(state6_.analyzeMessage(createMessage6(DHCPV6_SOLICIT, 11, 1500))); ASSERT_TRUE(state6_.failureDetected()); EXPECT_EQ(11, state6_.getUnackedClientsCount()); + EXPECT_EQ(23, state6_.getAnalyzedMessagesCount()); // Poking should cause all counters to reset as it is an indication that the // control connection has been re-established. @@ -354,6 +369,7 @@ TEST_F(CommunicationStateTest, detectFailureV6) { // We're back to no failure state. EXPECT_FALSE(state6_.failureDetected()); EXPECT_EQ(0, state6_.getUnackedClientsCount()); + EXPECT_EQ(0, state6_.getAnalyzedMessagesCount()); } // This test verifies that it is possible to disable analysis of the DHCPv6