2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 14:07:59 +00:00

adjust system tests to deal with possible timing issues

With the netmgr in use, named may start answering queries before zones
are loaded. This can cause transient failures in system tests after
servers are restarted or reconfigured. This commit adds retry loops
and sleep statements where needed to address this problem.

Also incidentally silenced a clang warning.
This commit is contained in:
Evan Hunt
2019-11-05 16:14:06 -08:00
parent b9a5508e52
commit 24510a1fda
18 changed files with 105 additions and 45 deletions

View File

@@ -696,11 +696,17 @@ $RNDCCMD 10.53.0.3 addzone "test4.baz" '{ type master; file "e.db"; };' > /dev/n
$RNDCCMD 10.53.0.3 addzone "test5.baz" '{ type master; file "e.db"; };' > /dev/null 2>&1 || ret=1
$PERL $SYSTEMTESTTOP/stop.pl addzone ns3
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} addzone ns3 || ret=1
$DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || ret=1
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
n=`expr $n + 1`
for try in 0 1 2 3 4 5 6 7 8 9; do
iret=0
$DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || iret=1
grep "status: NOERROR" dig.out.test$n > /dev/null || iret=1
[ "$iret" -eq 0 ] && break
sleep 1
done
[ "$iret" -ne 0 ] && ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "exit status: $status"
[ $status -eq 0 ] || exit 1

View File

@@ -124,13 +124,13 @@ add_name(struct dlz_example_data *state, struct record *list,
strlen(data) >= sizeof(list[i].data))
return (ISC_R_NOSPACE);
strncpy(list[i].name, name, sizeof(list[i].name));
strncpy(list[i].name, name, sizeof(list[i].name) - 1);
list[i].name[sizeof(list[i].name) - 1] = '\0';
strncpy(list[i].type, type, sizeof(list[i].type));
strncpy(list[i].type, type, sizeof(list[i].type) - 1);
list[i].type[sizeof(list[i].type) - 1] = '\0';
strncpy(list[i].data, data, sizeof(list[i].data));
strncpy(list[i].data, data, sizeof(list[i].data) - 1);
list[i].data[sizeof(list[i].data) - 1] = '\0';
list[i].ttl = ttl;

View File

@@ -20,7 +20,6 @@ rm -f dig.out.*
DIGOPTS="+tcp +noau +noadd +nosea +nostat +nocmd +dnssec -p 5300"
# Check the example. domain
echo "I:checking that positive validation works ($n)"
ret=0
$DIG $DIGOPTS . @10.53.0.1 soa > dig.out.ns1.test$n || ret=1

View File

@@ -98,10 +98,15 @@ status=`expr $status + $ret`
echo_i "checking that forward only zone overrides empty zone"
ret=0
# retry loop in case the server restart above causes transient failure
for try in 0 1 2 3 4 5 6 7 8 9; do
$DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2
grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1
$DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2
grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1
[ "$ret" -eq 0 ] && break
sleep 1
done
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`

View File

@@ -14,6 +14,7 @@ rm -f ns*/named.run
rm -f ns*/named.lock
# build.sh
rm -f ns1/named_dump.db*
rm -f ns6/K*
rm -f ns6/dsset-*
rm -f ns6/edns512.db

View File

@@ -259,8 +259,13 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} legacy ns1
n=`expr $n + 1`
echo_i "checking recursive lookup to edns 512 + no tcp + trust anchor fails ($n)"
# retry loop in case the server restart above causes transient failure
for try in 0 1 2 3 4 5 6 7 8 9; do
ret=0
resolution_fails edns512-notcp. || ret=1
[ "$ret" -eq 0 ] && break
sleep 1
done
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`

View File

@@ -506,7 +506,6 @@ grep "add nsec3param.test. 0 IN TYPE65534 .# 6 000140000400" jp.out.ns3.$n > /de
if [ $ret != 0 ] ; then echo_i "failed"; status=`expr $ret + $status`; fi
ret=0
echo_i "testing that rndc stop updates the master file"
$NSUPDATE -k ns1/ddns.key <<END > /dev/null || ret=1
@@ -514,16 +513,24 @@ server 10.53.0.1 ${PORT}
update add updated4.example.nil. 600 A 10.10.10.3
send
END
sleep 3
$PERL $SYSTEMTESTTOP/stop.pl --use-rndc --port ${CONTROLPORT} nsupdate ns1
sleep 3
# Removing the journal file and restarting the server means
# that the data served by the new server process are exactly
# those dumped to the master file by "rndc stop".
rm -f ns1/*jnl
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nsupdate ns1
$DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd updated4.example.nil.\
@10.53.0.1 a > dig.out.ns1 || status=1
digcomp knowngood.ns1.afterstop dig.out.ns1 || ret=1
[ $ret = 0 ] || { echo_i "failed"; status=1; }
for try in 0 1 2 3 4 5 6 7 8 9; do
iret=0
$DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd \
updated4.example.nil. @10.53.0.1 a > dig.out.ns1 || iret=1
digcomp knowngood.ns1.afterstop dig.out.ns1 || iret=1
[ "$iret" -eq 0 ] && break
sleep 1
done
[ "$iret" -ne 0 ] && ret=1
[ "$ret" -eq 0 ] || { echo_i "failed"; status=1; }
ret=0
echo_i "check that 'nsupdate -l' with a missing keyfile reports the missing file"

View File

@@ -61,9 +61,14 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nzd2nzf ns1
n=`expr $n + 1`
echo_i "querying for zone data from migrated zone config ($n)"
# retry loop in case the server restart above causes transient failures
for try in 0 1 2 3 4 5 6 7 8 9; do
ret=0
$DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1
grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1
[ "$ret" -eq 0 ] && break
sleep 1
done
n=`expr $n + 1`
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`

View File

@@ -12,7 +12,7 @@
// NS7
options {
query-source address 10.53.0.7 port @PORT@ dscp 13;
query-source address 10.53.0.7 dscp 13;
notify-source 10.53.0.7 dscp 14;
transfer-source 10.53.0.7 dscp 15;
port @PORT@;

View File

@@ -219,6 +219,7 @@ restart () {
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz ns$1
load_db
dnsrps_loaded
sleep 1
}
# $1=server and irrelevant args
@@ -465,6 +466,7 @@ for mode in native dnsrps; do
else
echo_i "running DNSRPS sub-test"
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz
sleep 3
fi
;;
esac

View File

@@ -135,6 +135,7 @@ for mode in native dnsrps; do
else
echo_i "running DNSRPS sub-test"
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpzrecurse
sleep 3
fi
;;
esac

View File

@@ -71,7 +71,7 @@ $RNDCCMD -s 10.53.0.3 stats > /dev/null 2>&1
[ -f ns3/named.stats ] || ret=1
if [ ! "$CYGWIN" ]; then
nsock0nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'`
[ 0 -ne ${nsock0nstat:-0} ] || ret=1
[ 0 -eq ${nsock0nstat:-0} ] || ret=1
fi
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
@@ -107,7 +107,7 @@ if [ ! "$CYGWIN" ]; then
ret=0
echo_i "verifying active sockets output in named.stats ($n)"
nsock1nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'`
[ `expr $nsock1nstat - $nsock0nstat` -eq 1 ] || ret=1
[ `expr ${nsock1nstat:-0} - ${nsock0nstat:-0}` -eq 1 ] || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`

View File

@@ -42,7 +42,7 @@ import time
# Timeout for establishing all connections requested by a single 'open' command.
OPEN_TIMEOUT = 2
VERSION_QUERY = b'\x00\x1e\xaf\xb8\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07version\x04bind\x00\x00\x10\x00\x03'
def log(msg):
print(datetime.datetime.now().strftime('%d-%b-%Y %H:%M:%S.%f ') + msg)
@@ -84,6 +84,7 @@ def open_connections(active_conns, count, host, port):
log('%s for socket %s' % (errno.errorcode[err], sock))
errors.append(sock)
else:
sock.send(VERSION_QUERY)
active_conns.append(sock)
if errors:

View File

@@ -163,8 +163,12 @@ check_stats_limit() {
assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || return 1
}
retry 2 check_stats_limit || ret=1
close_connections $((TCP_LIMIT + 1))
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
# wait for connections to close
sleep 5
echo_i "exit status: $status"
[ $status -eq 0 ] || exit 1

View File

@@ -122,16 +122,24 @@ do
done
echo_i "checking large unknown record loading on master"
for try in 0 1 2 3 4 5 6 7 8 9; do
ret=0
$DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; }
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; }
[ "$ret" -eq 0 ] && break
sleep 1
done
[ $ret = 0 ] || echo_i "failed"
status=`expr $status + $ret`
echo_i "checking large unknown record loading on slave"
for try in 0 1 2 3 4 5 6 7 8 9; do
ret=0
$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; }
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; }
[ "$ret" -eq 0 ] && break
sleep 1
done
[ $ret = 0 ] || echo_i "failed"
status=`expr $status + $ret`
@@ -139,10 +147,16 @@ echo_i "stop and restart slave"
$PERL $SYSTEMTESTTOP/stop.pl unknown ns2
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns2
# server may be answering queries before zones are loaded,
# so retry a few times if this query fails
echo_i "checking large unknown record loading on slave"
for try in 0 1 2 3 4 5 6 7 8 9; do
ret=0
$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; }
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; }
[ "$ret" -eq 0 ] && break
sleep 1
done
[ $ret = 0 ] || echo_i "failed"
status=`expr $status + $ret`
@@ -157,10 +171,16 @@ echo_i "stop and restart inline slave"
$PERL $SYSTEMTESTTOP/stop.pl unknown ns3
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns3
# server may be answering queries before zones are loaded,
# so retry a few times if this query fails
echo_i "checking large unknown record loading on inline slave"
for try in 0 1 2 3 4 5 6 7 8 9; do
ret=0
$DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; }
$DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; }
[ "$ret" -eq 0 ] && break
sleep 1
done
[ $ret = 0 ] || echo_i "failed"
status=`expr $status + $ret`

View File

@@ -17,7 +17,7 @@ options {
pid-file "named.pid";
listen-on { 10.53.0.3; };
listen-on-v6 { none; };
recursion yes;
recursion no;
notify yes;
};

View File

@@ -21,8 +21,6 @@ DIGOPTS="+tcp +noadd +nosea +nostat +noquest +nocomm +nocmd -p ${PORT}"
status=0
n=1
sleep 5
echo_i "waiting for servers to be ready for testing ($n)"
for i in 1 2 3 4 5 6 7 8 9 10
do

View File

@@ -431,11 +431,17 @@ $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.1.$n
grep "status: NOERROR," dig.out.1.$n > /dev/null || tmp=1
$PERL $SYSTEMTESTTOP/stop.pl xfer ns3
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} xfer ns3
for try in 0 1 2 3 4 5 6 7 8 9; do
iret=0
$DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n
grep "status: NOERROR," dig.out.2.$n > /dev/null || tmp=1
grep "status: NOERROR," dig.out.2.$n > /dev/null || iret=1
$DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n
digcomp knowngood.mapped dig.out.3.$n || tmp=1
if test $tmp != 0 ; then echo_i "failed"; fi
digcomp knowngood.mapped dig.out.3.$n || iret=1
[ "$iret" -eq 0 ] && break
sleep 1
done
[ "$iret" -eq 0 ] || tmp=1
[ "$tmp" -ne 0 ] && echo_i "failed"
status=`expr $status + $tmp`
n=`expr $n + 1`