2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-09-05 17:15:31 +00:00
Files
bind/bin/tests/system/statistics/tests.sh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

281 lines
11 KiB
Bash
Raw Normal View History

#!/bin/sh
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
2012-06-29 11:39:47 +10:00
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
2018-02-20 15:43:27 -08:00
# information regarding copyright ownership.
set -e
. ../conf.sh
Corrected several system test issues - serve-stale: dig wasn't always running in background when it should. some of the serve-stale test cases are based on groups of dig calls running simultaneously in the background: the test pauses and resumes running after 'wait'. in some cases the final call to dig in a group wasn't in the background, and this sometimes caused delays that affected later test results. in another case, a test was simplified and made more reliable by running dig in the foreground removing a sleep. - serve-stale: The extension of the dig timeout period from 10 to 11 seconds in commit 5307bf64ce80 was left undone in a few places and has now been completed. - serve-stale: Resolver-query-timeout was set incorrectly. a comment above a test case in serve-stale/tests.sh says: "We configured a long value of 30 seconds for resolver-query-timeout," but resolver-query-timeout was actually set to 10, not 30. this is now fixed. - rpz: Force retransfer of the fast-expire zone, to ensure it's fully loaded in ns3; previously it could have been left unloaded if ns5 wasn't up yet when ns3 attempted the zone transfer. - statistics: The TCP4SendErr counter is incremented when a TCP dispatch is canceled while sending. depending on test timing, this may have happened by the time the statistics are dumped. worked around by ignoring that stat couunter when checking for errors. - hooks: Add a prereq.sh script to prevent running under TSAN. - zero: Disabled the servfail cache so that SERVFAIL is reported only when there actually is a failure, not repeatedly every time the same query is sent.
2021-09-24 15:45:57 -07:00
DIGCMD="$DIG +tcp -p ${PORT}"
2018-02-20 15:43:27 -08:00
RNDCCMD="$RNDC -p ${CONTROLPORT} -c ../common/rndc.conf"
status=0
ret=0
n=1
2021-07-21 17:09:53 +10:00
stats=0
nsock0nstat=0
nsock1nstat=0
2021-07-21 17:09:53 +10:00
rndc_stats() {
_ns=$1
_ip=$2
$RNDCCMD -s $_ip stats > /dev/null 2>&1 || return 1
[ -f "${_ns}/named.stats" ] || return 1
last_stats=named.stats.$_ns-$stats-$n
mv ${_ns}/named.stats $last_stats
stats=$((stats+1))
}
2018-02-20 15:43:27 -08:00
echo_i "fetching a.example from ns2's initial configuration ($n)"
$DIGCMD +noauth a.example. @10.53.0.2 any > dig.out.ns2.1 || ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
echo_i "dumping initial stats for ns2 ($n)"
2021-07-21 17:09:53 +10:00
rndc_stats ns2 10.53.0.2 || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
echo_i "verifying adb records in named.stats ($n)"
2021-07-21 17:09:53 +10:00
grep "ADB stats" $last_stats > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
echo_i "checking for 1 entry in adb hash table in named.stats ($n)"
2021-07-21 17:09:53 +10:00
grep "1 Addresses in hash table" $last_stats > /dev/null || ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
2018-02-20 15:43:27 -08:00
echo_i "verifying cache statistics in named.stats ($n)"
2021-07-21 17:09:53 +10:00
grep "Cache Statistics" $last_stats > /dev/null || ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
echo_i "checking for 2 entries in adb hash table in named.stats ($n)"
$DIGCMD a.example.info. @10.53.0.2 any > /dev/null 2>&1
2021-07-21 17:09:53 +10:00
rndc_stats ns2 10.53.0.2 || ret=1
grep "2 Addresses in hash table" $last_stats > /dev/null || ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
echo_i "dumping initial stats for ns3 ($n)"
2021-07-21 17:09:53 +10:00
rndc_stats ns3 10.53.0.3 || ret=1
nsock0nstat=`grep "UDP/IPv4 sockets active" $last_stats | awk '{print $1}'`
[ 0 -ne ${nsock0nstat} ] || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
2018-02-20 15:43:27 -08:00
echo_i "sending queries to ns3"
$DIGCMD +tries=2 +time=1 +recurse @10.53.0.3 foo.info. any > /dev/null 2>&1
ret=0
2018-02-20 15:43:27 -08:00
echo_i "dumping updated stats for ns3 ($n)"
getstats() {
rndc_stats ns3 10.53.0.3 || return 1
grep "2 recursing clients" $last_stats > /dev/null || return 1
}
retry_quiet 5 getstats || ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
2018-02-20 15:43:27 -08:00
echo_i "verifying recursing clients output in named.stats ($n)"
2021-07-21 17:09:53 +10:00
grep "2 recursing clients" $last_stats > /dev/null || ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
2018-02-20 15:43:27 -08:00
echo_i "verifying active fetches output in named.stats ($n)"
2021-07-21 17:09:53 +10:00
grep "1 active fetches" $last_stats > /dev/null || ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
echo_i "verifying active sockets output in named.stats ($n)"
nsock1nstat=`grep "UDP/IPv4 sockets active" $last_stats | awk '{print $1}'`
[ $((nsock1nstat - nsock0nstat)) -eq 1 ] || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
# there should be 1 UDP and no TCP queries. As the TCP counter is zero
# no status line is emitted.
ret=0
2018-02-20 15:43:27 -08:00
echo_i "verifying queries in progress in named.stats ($n)"
2021-07-21 17:09:53 +10:00
grep "1 UDP queries in progress" $last_stats > /dev/null || ret=1
grep "TCP queries in progress" $last_stats > /dev/null && ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
2018-02-20 15:43:27 -08:00
echo_i "verifying bucket size output ($n)"
2021-07-21 17:09:53 +10:00
grep "bucket size" $last_stats > /dev/null || ret=1
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
echo_i "checking priming queries are counted ($n)"
grep "priming queries" $last_stats > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
2018-02-20 15:43:27 -08:00
echo_i "checking that zones with slash are properly shown in XML output ($n)"
if $FEATURETEST --have-libxml2 && [ -x ${CURL} ] ; then
${CURL} http://10.53.0.1:${EXTRAPORT1}/xml/v3/zones > curl.out.${n} 2>/dev/null || ret=1
grep '<zone name="32/1.0.0.127-in-addr.example" rdataclass="IN">' curl.out.${n} > /dev/null || ret=1
else
2018-02-20 15:43:27 -08:00
echo_i "skipping test as libxml2 and/or curl was not found"
fi
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
ret=0
2018-02-20 15:43:27 -08:00
echo_i "checking that zones return their type ($n)"
if $FEATURETEST --have-libxml2 && [ -x ${CURL} ] ; then
${CURL} http://10.53.0.1:${EXTRAPORT1}/xml/v3/zones > curl.out.${n} 2>/dev/null || ret=1
grep '<zone name="32/1.0.0.127-in-addr.example" rdataclass="IN"><type>primary</type>' curl.out.${n} > /dev/null || ret=1
else
2018-02-20 15:43:27 -08:00
echo_i "skipping test as libxml2 and/or curl was not found"
fi
2018-02-20 15:43:27 -08:00
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
2019-05-20 15:24:03 +10:00
ret=0
echo_i "checking bind9.xsl vs xml ($n)"
if $FEATURETEST --have-libxml2 && "${CURL}" --http1.1 http://10.53.0.3:${EXTRAPORT1} > /dev/null 2>&1 && [ -x "${XSLTPROC}" ] ; then
Corrected several system test issues - serve-stale: dig wasn't always running in background when it should. some of the serve-stale test cases are based on groups of dig calls running simultaneously in the background: the test pauses and resumes running after 'wait'. in some cases the final call to dig in a group wasn't in the background, and this sometimes caused delays that affected later test results. in another case, a test was simplified and made more reliable by running dig in the foreground removing a sleep. - serve-stale: The extension of the dig timeout period from 10 to 11 seconds in commit 5307bf64ce80 was left undone in a few places and has now been completed. - serve-stale: Resolver-query-timeout was set incorrectly. a comment above a test case in serve-stale/tests.sh says: "We configured a long value of 30 seconds for resolver-query-timeout," but resolver-query-timeout was actually set to 10, not 30. this is now fixed. - rpz: Force retransfer of the fast-expire zone, to ensure it's fully loaded in ns3; previously it could have been left unloaded if ns5 wasn't up yet when ns3 attempted the zone transfer. - statistics: The TCP4SendErr counter is incremented when a TCP dispatch is canceled while sending. depending on test timing, this may have happened by the time the statistics are dumped. worked around by ignoring that stat couunter when checking for errors. - hooks: Add a prereq.sh script to prevent running under TSAN. - zero: Disabled the servfail cache so that SERVFAIL is reported only when there actually is a failure, not repeatedly every time the same query is sent.
2021-09-24 15:45:57 -07:00
$DIGCMD +notcp +recurse @10.53.0.3 soa . > dig.out.test$n.1 2>&1
$DIGCMD +notcp +recurse @10.53.0.3 soa example > dig.out.test$n.2 2>&1
# check multiple requests over the same socket
time1=$($PERL -e 'print time(), "\n";')
${CURL} --http1.1 -o curl.out.${n}.xml http://10.53.0.3:${EXTRAPORT1}/xml/v3 \
-o curl.out.${n}.xsl http://10.53.0.3:${EXTRAPORT1}/bind9.xsl 2>/dev/null || ret=1
time2=$($PERL -e 'print time(), "\n";')
test $((time2 - time1)) -lt 5 || ret=1
diff ${TOP_SRCDIR}/bin/named/bind9.xsl curl.out.${n}.xsl || ret=1
2019-05-20 15:24:03 +10:00
${XSLTPROC} curl.out.${n}.xsl - < curl.out.${n}.xml > xsltproc.out.${n} 2>/dev/null || ret=1
cp curl.out.${n}.xml stats.xml.out || ret=1
2019-05-20 15:24:03 +10:00
#
# grep for expected sections.
#
grep "<h1>ISC Bind 9 Configuration and Statistics</h1>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Server Status</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Incoming Requests by DNS Opcode</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h3>Incoming Queries by Query Type</h3>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Outgoing Queries per view</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h3>View " xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Server Statistics</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Zone Maintenance Statistics</h2>" xsltproc.out.${n} >/dev/null || ret=1
Corrected several system test issues - serve-stale: dig wasn't always running in background when it should. some of the serve-stale test cases are based on groups of dig calls running simultaneously in the background: the test pauses and resumes running after 'wait'. in some cases the final call to dig in a group wasn't in the background, and this sometimes caused delays that affected later test results. in another case, a test was simplified and made more reliable by running dig in the foreground removing a sleep. - serve-stale: The extension of the dig timeout period from 10 to 11 seconds in commit 5307bf64ce80 was left undone in a few places and has now been completed. - serve-stale: Resolver-query-timeout was set incorrectly. a comment above a test case in serve-stale/tests.sh says: "We configured a long value of 30 seconds for resolver-query-timeout," but resolver-query-timeout was actually set to 10, not 30. this is now fixed. - rpz: Force retransfer of the fast-expire zone, to ensure it's fully loaded in ns3; previously it could have been left unloaded if ns5 wasn't up yet when ns3 attempted the zone transfer. - statistics: The TCP4SendErr counter is incremented when a TCP dispatch is canceled while sending. depending on test timing, this may have happened by the time the statistics are dumped. worked around by ignoring that stat couunter when checking for errors. - hooks: Add a prereq.sh script to prevent running under TSAN. - zero: Disabled the servfail cache so that SERVFAIL is reported only when there actually is a failure, not repeatedly every time the same query is sent.
2021-09-24 15:45:57 -07:00
# grep "<h2>Resolver Statistics (Common)</h2>" xsltproc.out.${n} >/dev/null || ret=1
2019-05-20 15:24:03 +10:00
grep "<h3>Resolver Statistics for View " xsltproc.out.${n} >/dev/null || ret=1
grep "<h3>ADB Statistics for View " xsltproc.out.${n} >/dev/null || ret=1
grep "<h3>Cache Statistics for View " xsltproc.out.${n} >/dev/null || ret=1
# grep "<h3>Cache DB RRsets for View " xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Traffic Size Statistics</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h4>UDP Requests Received</h4>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h4>UDP Responses Sent</h4>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h4>TCP Requests Received</h4>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h4>TCP Responses Sent</h4>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Socket I/O Statistics</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h3>Zones for View " xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Received QTYPES per view/zone</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h3>View _default" xsltproc.out.${n} >/dev/null || ret=1
grep "<h4>Zone example" xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Response Codes per view/zone</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h3>View _default" xsltproc.out.${n} >/dev/null || ret=1
grep "<h4>Zone example" xsltproc.out.${n} >/dev/null || ret=1
# grep "<h2>Glue cache statistics</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h3>View _default" xsltproc.out.${n} >/dev/null || ret=1
grep "<h4>Zone example" xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Memory Usage Summary</h2>" xsltproc.out.${n} >/dev/null || ret=1
grep "<h2>Memory Contexts</h2>" xsltproc.out.${n} >/dev/null || ret=1
else
echo_i "skipping test as libxml2 and/or curl with HTTP/1.1 support and/or xsltproc was not found"
2019-05-20 15:24:03 +10:00
fi
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
2019-05-20 15:24:03 +10:00
ret=0
echo_i "checking bind9.xml socket statistics ($n)"
if $FEATURETEST --have-libxml2 && [ -e stats.xml.out ] && [ -x "${XSLTPROC}" ] ; then
# Socket statistics (expect no errors)
grep "<counter name=\"TCP4AcceptFail\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP4BindFail\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP4ConnFail\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP4OpenFail\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP4RecvErr\">0</counter>" stats.xml.out >/dev/null || ret=1
Corrected several system test issues - serve-stale: dig wasn't always running in background when it should. some of the serve-stale test cases are based on groups of dig calls running simultaneously in the background: the test pauses and resumes running after 'wait'. in some cases the final call to dig in a group wasn't in the background, and this sometimes caused delays that affected later test results. in another case, a test was simplified and made more reliable by running dig in the foreground removing a sleep. - serve-stale: The extension of the dig timeout period from 10 to 11 seconds in commit 5307bf64ce80 was left undone in a few places and has now been completed. - serve-stale: Resolver-query-timeout was set incorrectly. a comment above a test case in serve-stale/tests.sh says: "We configured a long value of 30 seconds for resolver-query-timeout," but resolver-query-timeout was actually set to 10, not 30. this is now fixed. - rpz: Force retransfer of the fast-expire zone, to ensure it's fully loaded in ns3; previously it could have been left unloaded if ns5 wasn't up yet when ns3 attempted the zone transfer. - statistics: The TCP4SendErr counter is incremented when a TCP dispatch is canceled while sending. depending on test timing, this may have happened by the time the statistics are dumped. worked around by ignoring that stat couunter when checking for errors. - hooks: Add a prereq.sh script to prevent running under TSAN. - zero: Disabled the servfail cache so that SERVFAIL is reported only when there actually is a failure, not repeatedly every time the same query is sent.
2021-09-24 15:45:57 -07:00
# grep "<counter name=\"TCP4SendErr\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP6AcceptFail\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP6BindFail\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP6ConnFail\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP6OpenFail\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP6RecvErr\">0</counter>" stats.xml.out >/dev/null || ret=1
grep "<counter name=\"TCP6SendErr\">0</counter>" stats.xml.out >/dev/null || ret=1
else
echo_i "skipping test as libxml2 and/or stats.xml.out file and/or xsltproc was not found"
fi
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
echo_i "Check that 'zone-statistics full;' is processed by 'rndc reconfig' ($n)"
ret=0
# off by default
2021-07-21 17:09:53 +10:00
rndc_stats ns2 10.53.0.2 || ret=1
sed -n '/Per Zone Query Statistics/,/^++/p' $last_stats | grep -F '[example]' > /dev/null && ret=0
# turn on
copy_setports ns2/named2.conf.in ns2/named.conf
rndc_reconfig ns2 10.53.0.2
2021-07-21 17:09:53 +10:00
rndc_stats ns2 10.53.0.2 || ret=1
sed -n '/Per Zone Query Statistics/,/^++/p' $last_stats | grep -F '[example]' > /dev/null || ret=1
# turn off
copy_setports ns2/named.conf.in ns2/named.conf
rndc_reconfig ns2 10.53.0.2
2021-07-21 17:09:53 +10:00
rndc_stats ns2 10.53.0.2 || ret=1
sed -n '/Per Zone Query Statistics/,/^++/p' $last_stats | grep -F '[example]' > /dev/null && ret=0
# turn on
copy_setports ns2/named2.conf.in ns2/named.conf
rndc_reconfig ns2 10.53.0.2
2021-07-21 17:09:53 +10:00
rndc_stats ns2 10.53.0.2 || ret=1
sed -n '/Per Zone Query Statistics/,/^++/p' $last_stats | grep -F '[example]' > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
2018-02-20 15:43:27 -08:00
echo_i "exit status: $status"
[ $status -eq 0 ] || exit 1