2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-31 06:25:31 +00:00

fix: usr: Fix resolver statistics counters for timed out responses

When query responses timed out, the resolver could incorrectly increase the regular responses counters, even if no response was received. This has been fixed.

Closes #5193

Merge branch '5193-resolver-statistics-counters-fix' into 'main'

See merge request isc-projects/bind9!10227
This commit is contained in:
Arаm Sаrgsyаn
2025-03-18 17:05:23 +00:00
3 changed files with 30 additions and 6 deletions

View File

@@ -43,6 +43,12 @@ grep "status: NOERROR" dig.out.ns1.test${n} >/dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
rndccmd 10.53.0.1 stats || ret=1 # Get the responses, RTT and timeout statistics before the following timeout tests
grep -F 'responses received' ns1/named.stats >ns1/named.stats.responses-before || true
grep -F 'queries with RTT' ns1/named.stats >ns1/named.stats.rtt-before || true
grep -F 'query timeouts' ns1/named.stats >ns1/named.stats.timeouts-before || true
mv ns1/named.stats ns1/named.stats-before
# 'resolver-query-timeout' is set to 5 seconds in ns1, so dig with a lower
# timeout value should give up earlier than that.
n=$((n + 1))
@@ -66,6 +72,20 @@ grep -F "EDE: 22 (No Reachable Authority)" dig.out.ns1.test${n} >/dev/null || re
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
echo_i "checking that the timeout didn't skew the resolver responses counters and did update the timeout counter ($n)"
ret=0
rndccmd 10.53.0.1 stats || ret=1
grep -F 'responses received' ns1/named.stats >ns1/named.stats.responses-after || true
grep -F 'queries with RTT' ns1/named.stats >ns1/named.stats.rtt-after || true
grep -F 'query timeouts' ns1/named.stats >ns1/named.stats.timeouts-after || true
mv ns1/named.stats ns1/named.stats-after
diff ns1/named.stats.responses-before ns1/named.stats.responses-after >/dev/null || ret=1
diff ns1/named.stats.rtt-before ns1/named.stats.rtt-after >/dev/null || ret=1
diff ns1/named.stats.timeouts-before ns1/named.stats.timeouts-after >/dev/null && ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
# 'resolver-query-timeout' is set to 5 seconds in ns1, so named should
# interrupt the non-responsive query and send a SERVFAIL answer before dig's
# own timeout fires, which is set to 7 seconds. This time, exampleudp.net is

View File

@@ -21,6 +21,7 @@ pytestmark = pytest.mark.extra_artifacts(
"nextpart.out.*",
"ans*/ans.run",
"ans*/query.log",
"ns1/named.stats*",
"ns4/tld.db",
"ns5/trusted.conf",
"ns6/K*",

View File

@@ -7462,10 +7462,12 @@ resquery_response(isc_result_t eresult, isc_region_t *region, void *arg) {
QTRACE("response");
if (isc_sockaddr_pf(&query->addrinfo->sockaddr) == PF_INET) {
inc_stats(fctx->res, dns_resstatscounter_responsev4);
} else {
inc_stats(fctx->res, dns_resstatscounter_responsev6);
if (eresult == ISC_R_SUCCESS) {
if (isc_sockaddr_pf(&query->addrinfo->sockaddr) == PF_INET) {
inc_stats(fctx->res, dns_resstatscounter_responsev4);
} else {
inc_stats(fctx->res, dns_resstatscounter_responsev6);
}
}
rctx = isc_mem_get(fctx->mctx, sizeof(*rctx));
@@ -8047,6 +8049,9 @@ rctx_timedout(respctx_t *rctx) {
fctx->timeout = true;
fctx->timeouts++;
rctx->no_response = true;
rctx->finish = NULL;
now = isc_time_now();
/* netmgr timeouts are accurate to the millisecond */
if (isc_time_microdiff(&fctx->expires, &now) < US_PER_MS) {
@@ -8057,8 +8062,6 @@ rctx_timedout(respctx_t *rctx) {
} else {
FCTXTRACE("query timed out; trying next server");
/* try next server */
rctx->no_response = true;
rctx->finish = NULL;
rctx->next_server = true;
}