2
0
mirror of https://gitlab.isc.org/isc-projects/kea synced 2025-08-30 21:45:37 +00:00

[#3513] add HA clock skew to status-get

This commit is contained in:
Andrei Pavel
2024-08-21 13:18:17 +03:00
parent b1169c0850
commit 5bbf2077be
5 changed files with 104 additions and 43 deletions

View File

@@ -2106,21 +2106,24 @@ the HA status of two ``load-balancing`` servers:
"local": {
"role": "primary",
"scopes": [ "server1" ],
"server-name": "server1",
"state": "load-balancing",
"server-name": "server1"
"system-time": "2024-01-01 12:00:00"
},
"remote": {
"age": 10,
"in-touch": true,
"role": "secondary",
"last-scopes": [ "server2" ],
"last-state": "load-balancing",
"analyzed-packets": 8,
"clock-skew": 0,
"communication-interrupted": true,
"connecting-clients": 2,
"in-touch": true,
"last-scopes": [ "server2" ],
"last-state": "load-balancing",
"role": "secondary",
"server-name": "server2",
"system-time": "2024-01-01 12:00:00",
"unacked-clients": 1,
"unacked-clients-left": 2,
"analyzed-packets": 8,
"server-name": "server2"
"unacked-clients-left": 2
}
}
}
@@ -2167,6 +2170,11 @@ server may start monitoring the DHCP traffic directed to the partner to see if
the partner is responding to this traffic. More about the failover procedure can
be found in :ref:`ha-load-balancing-config`.
The ``system-time`` parameters hold the UTC time in ``%Y-%m-%d %H:%M:%S`` format
for each active node: local, and remote, respectively. The ``clock-skew``
parameter is available in the ``remote`` map and holds the difference in seconds
between the two times.
The ``connecting-clients``, ``unacked-clients``, ``unacked-clients-left``, and
``analyzed-packets`` parameters were introduced along with the
``communication-interrupted`` parameter and they convey useful information about

View File

@@ -535,12 +535,15 @@ CommunicationState::logFormatClockSkewInternal() const {
// Note HttpTime resolution is only to seconds, so we use fractional
// precision of zero when logging.
os << "my time: " << util::ptimeToText(my_time_at_skew_, 0)
<< ", partner's time: " << util::ptimeToText(partner_time_at_skew_, 0)
os << "my time: " << ptimeToText(my_time_at_skew_, 0)
<< ", partner's time: " << ptimeToText(partner_time_at_skew_, 0)
<< ", partner's clock is ";
// If negative clock skew, the partner's time is behind our time.
if (clock_skew_.is_negative()) {
if (clock_skew_.total_seconds() == 0) {
// Most common case.
os << "synchroninzed";
} else if (clock_skew_.is_negative()) {
// Partner's time is behind our time.
os << clock_skew_.invert_sign().total_seconds() << "s behind";
} else {
// Partner's time is ahead of ours.
@@ -584,6 +587,8 @@ CommunicationState::getReport() const {
}
report->set("unacked-clients-left", Element::create(unacked_clients_left));
report->set("analyzed-packets", Element::create(static_cast<long long>(getAnalyzedMessagesCount())));
report->set("system-time", Element::create(ptimeToText(getPartnerTimeAtSkew(), 0)));
report->set("clock-skew", Element::create(clock_skew_.total_seconds()));
return (report);
}
@@ -651,6 +656,24 @@ CommunicationState::setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_c
partner_unsent_update_count_.second = unsent_update_count;
}
boost::posix_time::ptime
CommunicationState::getMyTimeAtSkew() const {
if (my_time_at_skew_.is_not_a_date_time()) {
// Return current time.
return boost::posix_time::microsec_clock::universal_time();
}
return my_time_at_skew_;
}
boost::posix_time::ptime
CommunicationState::getPartnerTimeAtSkew() const {
if (partner_time_at_skew_.is_not_a_date_time()) {
// Return current time.
return boost::posix_time::microsec_clock::universal_time();
}
return partner_time_at_skew_;
}
CommunicationState4::CommunicationState4(const IOServicePtr& io_service,
const HAConfigPtr& config)
: CommunicationState(io_service, config), connecting_clients_(),

View File

@@ -698,6 +698,23 @@ private:
/// the partner.
void setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_count);
public:
/// @brief Retrieves the time of the local node when skew was last calculated.
///
/// Used in reporting to the user, which is why being lenient with corner cases is important.
/// That is why if the time was not initialized yet, it is approximated to the current time.
///
/// @return my time at skew
boost::posix_time::ptime getMyTimeAtSkew() const;
/// @brief Retrieves the time of the partner node when skew was last calculated.
///
/// Used in reporting to the user, which is why being lenient with corner cases is important.
/// That is why if the time was not initialized yet, it is approximated to the current time.
///
/// @return partner's time at skew
boost::posix_time::ptime getPartnerTimeAtSkew() const;
protected:
/// @brief Pointer to the common IO service instance.
asiolink::IOServicePtr io_service_;

View File

@@ -22,6 +22,7 @@
#include <http/date_time.h>
#include <http/response_json.h>
#include <http/post_request_json.h>
#include <util/boost_time_utils.h>
#include <util/multi_threading_mgr.h>
#include <util/stopwatch.h>
#include <boost/pointer_cast.hpp>
@@ -1693,6 +1694,7 @@ HAService::processStatusGet() const {
}
local->set("scopes", list);
local->set("server-name", Element::create(config_->getThisServerName()));
local->set("system-time", Element::create(ptimeToText(communication_state_->getMyTimeAtSkew(), 0)));
ha_servers->set("local", local);
// Do not include remote server information if this is a backup server or
@@ -3283,7 +3285,7 @@ HAService::clientCloseHandler(int tcp_native_fd) {
if (tcp_native_fd >= 0) {
IfaceMgr::instance().deleteExternalSocket(tcp_native_fd);
}
};
}
size_t
HAService::pendingRequestSize() {

View File

@@ -19,36 +19,6 @@
"{",
" \"result\": <integer>,",
" \"arguments\": {",
" \"pid\": <integer>,",
" \"uptime\": <uptime in seconds>,",
" \"reload\": <time since reload in seconds>,",
" \"high-availability\": [",
" {",
" \"ha-mode\": <HA mode configured for this relationship>,",
" \"ha-servers\": {",
" \"local\": {",
" \"role\": <role of this server as in the configuration file>,",
" \"scopes\": <list of scope names served by this server>,",
" \"state\": <HA state name of the server receiving the command>",
" },",
" \"remote\": {",
" \"age\": <the age of the remote status in seconds>,",
" \"in-touch\": <indicates if this server communicated with remote>,",
" \"last-scopes\": <list of scopes served by partner>,",
" \"last-state\": <HA state name of the partner>,",
" \"role\": <partner role>",
" }",
" }",
" }",
" ],",
" \"multi-threading-enabled\": true,",
" \"thread-pool-size\": 4,",
" \"packet-queue-size\": 64,",
" \"packet-queue-statistics\": [ 1.2, 2.3, 3.4 ],",
" \"sockets\": {",
" \"errors\": [ <error received during the last attempt to open all sockets> ],",
" \"status\": <ready, retrying, or failed>",
" },",
" \"dhcp-state\": {",
" \"disabled-by-db-connection\": false,",
" \"disabled-by-local-command\": [],",
@@ -56,6 +26,47 @@
" \"disabled-by-user\": false,",
" \"globally-disabled\": false",
" }",
" \"extended-info-tables\": <whether relay information is held in lease tables>,",
" \"high-availability\": [",
" {",
" \"ha-mode\": <HA mode configured for this relationship>,",
" \"ha-servers\": {",
" \"local\": {",
" \"role\": <role of this server as in the configuration file>,",
" \"scopes\": <list of scope names served by this server>,",
" \"server-name\": <name of the local server>,",
" \"state\": <HA state name of the server receiving the command>,",
" \"system-time\": <system time in format '%Y-%m-%d %H:%M:%S' on UTC timezone>",
" },",
" \"remote\": {",
" \"age\": <the age of the remote status in seconds>,",
" \"analyzed-packets\": <number of packets sent to the partner server since communication was interrupted>,",
" \"clock-skew\": <difference in seconds between local and partner server times>,",
" \"communication-interrupted\": <whether communication did not happen for more than max-response-delay milliseconds>,",
" \"connecting-clients\": <number of different clients getting a lease from partner>,",
" \"in-touch\": <indicates if this server communicated with partner>,",
" \"last-scopes\": <list of scopes served by partner>,",
" \"last-state\": <HA state name of the partner>,",
" \"role\": <partner role>",
" \"server-name\": <name of the partner server>,",
" \"system-time\": <system time in format '%Y-%m-%d %H:%M:%S' on UTC timezone>,",
" \"unacked-clients\": <number of unacked clients>,",
" \"unacked-clients-left\": <how many more clients have to be unacked before partner-down state>",
" }",
" }",
" }",
" ],",
" \"multi-threading-enabled\": true,",
" \"packet-queue-size\": 64,",
" \"packet-queue-statistics\": [ 1.2, 2.3, 3.4 ],",
" \"pid\": <integer>,",
" \"reload\": <time since reload in seconds>,",
" \"sockets\": {",
" \"errors\": <list of errors received during the last attempt to open all sockets; only appears when status is failed or retrying>,",
" \"status\": <ready, retrying, or failed>",
" }",
" \"thread-pool-size\": 4,",
" \"uptime\": <uptime in seconds>,",
" }",
"}"
],