2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-31 14:35:26 +00:00

adjust system tests to deal with possible timing issues

With the netmgr in use, named may start answering queries before zones
are loaded. This can cause transient failures in system tests after
servers are restarted or reconfigured. This commit adds retry loops
and sleep statements where needed to address this problem.

Also incidentally silenced a clang warning.
This commit is contained in:
Evan Hunt
2019-11-05 16:14:06 -08:00
parent b9a5508e52
commit 24510a1fda
18 changed files with 105 additions and 45 deletions

View File

@@ -696,11 +696,17 @@ $RNDCCMD 10.53.0.3 addzone "test4.baz" '{ type master; file "e.db"; };' > /dev/n
$RNDCCMD 10.53.0.3 addzone "test5.baz" '{ type master; file "e.db"; };' > /dev/null 2>&1 || ret=1 $RNDCCMD 10.53.0.3 addzone "test5.baz" '{ type master; file "e.db"; };' > /dev/null 2>&1 || ret=1
$PERL $SYSTEMTESTTOP/stop.pl addzone ns3 $PERL $SYSTEMTESTTOP/stop.pl addzone ns3
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} addzone ns3 || ret=1 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} addzone ns3 || ret=1
$DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || ret=1 for try in 0 1 2 3 4 5 6 7 8 9; do
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 iret=0
n=`expr $n + 1` $DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || iret=1
grep "status: NOERROR" dig.out.test$n > /dev/null || iret=1
[ "$iret" -eq 0 ] && break
sleep 1
done
[ "$iret" -ne 0 ] && ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret` status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "exit status: $status" echo_i "exit status: $status"
[ $status -eq 0 ] || exit 1 [ $status -eq 0 ] || exit 1

View File

@@ -124,13 +124,13 @@ add_name(struct dlz_example_data *state, struct record *list,
strlen(data) >= sizeof(list[i].data)) strlen(data) >= sizeof(list[i].data))
return (ISC_R_NOSPACE); return (ISC_R_NOSPACE);
strncpy(list[i].name, name, sizeof(list[i].name)); strncpy(list[i].name, name, sizeof(list[i].name) - 1);
list[i].name[sizeof(list[i].name) - 1] = '\0'; list[i].name[sizeof(list[i].name) - 1] = '\0';
strncpy(list[i].type, type, sizeof(list[i].type)); strncpy(list[i].type, type, sizeof(list[i].type) - 1);
list[i].type[sizeof(list[i].type) - 1] = '\0'; list[i].type[sizeof(list[i].type) - 1] = '\0';
strncpy(list[i].data, data, sizeof(list[i].data)); strncpy(list[i].data, data, sizeof(list[i].data) - 1);
list[i].data[sizeof(list[i].data) - 1] = '\0'; list[i].data[sizeof(list[i].data) - 1] = '\0';
list[i].ttl = ttl; list[i].ttl = ttl;

View File

@@ -20,7 +20,6 @@ rm -f dig.out.*
DIGOPTS="+tcp +noau +noadd +nosea +nostat +nocmd +dnssec -p 5300" DIGOPTS="+tcp +noau +noadd +nosea +nostat +nocmd +dnssec -p 5300"
# Check the example. domain # Check the example. domain
echo "I:checking that positive validation works ($n)" echo "I:checking that positive validation works ($n)"
ret=0 ret=0
$DIG $DIGOPTS . @10.53.0.1 soa > dig.out.ns1.test$n || ret=1 $DIG $DIGOPTS . @10.53.0.1 soa > dig.out.ns1.test$n || ret=1

View File

@@ -98,10 +98,15 @@ status=`expr $status + $ret`
echo_i "checking that forward only zone overrides empty zone" echo_i "checking that forward only zone overrides empty zone"
ret=0 ret=0
$DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 # retry loop in case the server restart above causes transient failure
grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1 for try in 0 1 2 3 4 5 6 7 8 9; do
$DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 $DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2
grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1 grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1
$DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2
grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1
[ "$ret" -eq 0 ] && break
sleep 1
done
if [ $ret != 0 ]; then echo_i "failed"; fi if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret` status=`expr $status + $ret`

View File

@@ -14,6 +14,7 @@ rm -f ns*/named.run
rm -f ns*/named.lock rm -f ns*/named.lock
# build.sh # build.sh
rm -f ns1/named_dump.db*
rm -f ns6/K* rm -f ns6/K*
rm -f ns6/dsset-* rm -f ns6/dsset-*
rm -f ns6/edns512.db rm -f ns6/edns512.db

View File

@@ -259,8 +259,13 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} legacy ns1
n=`expr $n + 1` n=`expr $n + 1`
echo_i "checking recursive lookup to edns 512 + no tcp + trust anchor fails ($n)" echo_i "checking recursive lookup to edns 512 + no tcp + trust anchor fails ($n)"
ret=0 # retry loop in case the server restart above causes transient failure
resolution_fails edns512-notcp. || ret=1 for try in 0 1 2 3 4 5 6 7 8 9; do
ret=0
resolution_fails edns512-notcp. || ret=1
[ "$ret" -eq 0 ] && break
sleep 1
done
if [ $ret != 0 ]; then echo_i "failed"; fi if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret` status=`expr $status + $ret`

View File

@@ -506,7 +506,6 @@ grep "add nsec3param.test. 0 IN TYPE65534 .# 6 000140000400" jp.out.ns3.$n > /de
if [ $ret != 0 ] ; then echo_i "failed"; status=`expr $ret + $status`; fi if [ $ret != 0 ] ; then echo_i "failed"; status=`expr $ret + $status`; fi
ret=0 ret=0
echo_i "testing that rndc stop updates the master file" echo_i "testing that rndc stop updates the master file"
$NSUPDATE -k ns1/ddns.key <<END > /dev/null || ret=1 $NSUPDATE -k ns1/ddns.key <<END > /dev/null || ret=1
@@ -514,16 +513,24 @@ server 10.53.0.1 ${PORT}
update add updated4.example.nil. 600 A 10.10.10.3 update add updated4.example.nil. 600 A 10.10.10.3
send send
END END
sleep 3
$PERL $SYSTEMTESTTOP/stop.pl --use-rndc --port ${CONTROLPORT} nsupdate ns1 $PERL $SYSTEMTESTTOP/stop.pl --use-rndc --port ${CONTROLPORT} nsupdate ns1
sleep 3
# Removing the journal file and restarting the server means # Removing the journal file and restarting the server means
# that the data served by the new server process are exactly # that the data served by the new server process are exactly
# those dumped to the master file by "rndc stop". # those dumped to the master file by "rndc stop".
rm -f ns1/*jnl rm -f ns1/*jnl
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nsupdate ns1 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nsupdate ns1
$DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd updated4.example.nil.\ for try in 0 1 2 3 4 5 6 7 8 9; do
@10.53.0.1 a > dig.out.ns1 || status=1 iret=0
digcomp knowngood.ns1.afterstop dig.out.ns1 || ret=1 $DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd \
[ $ret = 0 ] || { echo_i "failed"; status=1; } updated4.example.nil. @10.53.0.1 a > dig.out.ns1 || iret=1
digcomp knowngood.ns1.afterstop dig.out.ns1 || iret=1
[ "$iret" -eq 0 ] && break
sleep 1
done
[ "$iret" -ne 0 ] && ret=1
[ "$ret" -eq 0 ] || { echo_i "failed"; status=1; }
ret=0 ret=0
echo_i "check that 'nsupdate -l' with a missing keyfile reports the missing file" echo_i "check that 'nsupdate -l' with a missing keyfile reports the missing file"

View File

@@ -61,9 +61,14 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nzd2nzf ns1
n=`expr $n + 1` n=`expr $n + 1`
echo_i "querying for zone data from migrated zone config ($n)" echo_i "querying for zone data from migrated zone config ($n)"
ret=0 # retry loop in case the server restart above causes transient failures
$DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1 for try in 0 1 2 3 4 5 6 7 8 9; do
grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1 ret=0
$DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1
grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1
[ "$ret" -eq 0 ] && break
sleep 1
done
n=`expr $n + 1` n=`expr $n + 1`
if [ $ret != 0 ]; then echo_i "failed"; fi if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret` status=`expr $status + $ret`

View File

@@ -12,7 +12,7 @@
// NS7 // NS7
options { options {
query-source address 10.53.0.7 port @PORT@ dscp 13; query-source address 10.53.0.7 dscp 13;
notify-source 10.53.0.7 dscp 14; notify-source 10.53.0.7 dscp 14;
transfer-source 10.53.0.7 dscp 15; transfer-source 10.53.0.7 dscp 15;
port @PORT@; port @PORT@;

View File

@@ -219,6 +219,7 @@ restart () {
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz ns$1 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz ns$1
load_db load_db
dnsrps_loaded dnsrps_loaded
sleep 1
} }
# $1=server and irrelevant args # $1=server and irrelevant args
@@ -465,6 +466,7 @@ for mode in native dnsrps; do
else else
echo_i "running DNSRPS sub-test" echo_i "running DNSRPS sub-test"
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz
sleep 3
fi fi
;; ;;
esac esac

View File

@@ -135,6 +135,7 @@ for mode in native dnsrps; do
else else
echo_i "running DNSRPS sub-test" echo_i "running DNSRPS sub-test"
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpzrecurse $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpzrecurse
sleep 3
fi fi
;; ;;
esac esac

View File

@@ -71,7 +71,7 @@ $RNDCCMD -s 10.53.0.3 stats > /dev/null 2>&1
[ -f ns3/named.stats ] || ret=1 [ -f ns3/named.stats ] || ret=1
if [ ! "$CYGWIN" ]; then if [ ! "$CYGWIN" ]; then
nsock0nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'` nsock0nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'`
[ 0 -ne ${nsock0nstat:-0} ] || ret=1 [ 0 -eq ${nsock0nstat:-0} ] || ret=1
fi fi
if [ $ret != 0 ]; then echo_i "failed"; fi if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret` status=`expr $status + $ret`
@@ -107,7 +107,7 @@ if [ ! "$CYGWIN" ]; then
ret=0 ret=0
echo_i "verifying active sockets output in named.stats ($n)" echo_i "verifying active sockets output in named.stats ($n)"
nsock1nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'` nsock1nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'`
[ `expr $nsock1nstat - $nsock0nstat` -eq 1 ] || ret=1 [ `expr ${nsock1nstat:-0} - ${nsock0nstat:-0}` -eq 1 ] || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret` status=`expr $status + $ret`
n=`expr $n + 1` n=`expr $n + 1`

View File

@@ -42,7 +42,7 @@ import time
# Timeout for establishing all connections requested by a single 'open' command. # Timeout for establishing all connections requested by a single 'open' command.
OPEN_TIMEOUT = 2 OPEN_TIMEOUT = 2
VERSION_QUERY = b'\x00\x1e\xaf\xb8\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07version\x04bind\x00\x00\x10\x00\x03'
def log(msg): def log(msg):
print(datetime.datetime.now().strftime('%d-%b-%Y %H:%M:%S.%f ') + msg) print(datetime.datetime.now().strftime('%d-%b-%Y %H:%M:%S.%f ') + msg)
@@ -84,6 +84,7 @@ def open_connections(active_conns, count, host, port):
log('%s for socket %s' % (errno.errorcode[err], sock)) log('%s for socket %s' % (errno.errorcode[err], sock))
errors.append(sock) errors.append(sock)
else: else:
sock.send(VERSION_QUERY)
active_conns.append(sock) active_conns.append(sock)
if errors: if errors:

View File

@@ -163,8 +163,12 @@ check_stats_limit() {
assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || return 1 assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || return 1
} }
retry 2 check_stats_limit || ret=1 retry 2 check_stats_limit || ret=1
close_connections $((TCP_LIMIT + 1))
if [ $ret != 0 ]; then echo_i "failed"; fi if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret)) status=$((status + ret))
# wait for connections to close
sleep 5
echo_i "exit status: $status" echo_i "exit status: $status"
[ $status -eq 0 ] || exit 1 [ $status -eq 0 ] || exit 1

View File

@@ -122,16 +122,24 @@ do
done done
echo_i "checking large unknown record loading on master" echo_i "checking large unknown record loading on master"
ret=0 for try in 0 1 2 3 4 5 6 7 8 9; do
$DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } ret=0
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } $DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; }
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; }
[ "$ret" -eq 0 ] && break
sleep 1
done
[ $ret = 0 ] || echo_i "failed" [ $ret = 0 ] || echo_i "failed"
status=`expr $status + $ret` status=`expr $status + $ret`
echo_i "checking large unknown record loading on slave" echo_i "checking large unknown record loading on slave"
ret=0 for try in 0 1 2 3 4 5 6 7 8 9; do
$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } ret=0
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } $DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; }
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; }
[ "$ret" -eq 0 ] && break
sleep 1
done
[ $ret = 0 ] || echo_i "failed" [ $ret = 0 ] || echo_i "failed"
status=`expr $status + $ret` status=`expr $status + $ret`
@@ -139,10 +147,16 @@ echo_i "stop and restart slave"
$PERL $SYSTEMTESTTOP/stop.pl unknown ns2 $PERL $SYSTEMTESTTOP/stop.pl unknown ns2
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns2 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns2
# server may be answering queries before zones are loaded,
# so retry a few times if this query fails
echo_i "checking large unknown record loading on slave" echo_i "checking large unknown record loading on slave"
ret=0 for try in 0 1 2 3 4 5 6 7 8 9; do
$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } ret=0
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } $DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; }
$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; }
[ "$ret" -eq 0 ] && break
sleep 1
done
[ $ret = 0 ] || echo_i "failed" [ $ret = 0 ] || echo_i "failed"
status=`expr $status + $ret` status=`expr $status + $ret`
@@ -157,10 +171,16 @@ echo_i "stop and restart inline slave"
$PERL $SYSTEMTESTTOP/stop.pl unknown ns3 $PERL $SYSTEMTESTTOP/stop.pl unknown ns3
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns3
# server may be answering queries before zones are loaded,
# so retry a few times if this query fails
echo_i "checking large unknown record loading on inline slave" echo_i "checking large unknown record loading on inline slave"
ret=0 for try in 0 1 2 3 4 5 6 7 8 9; do
$DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } ret=0
$DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } $DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; }
$DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; }
[ "$ret" -eq 0 ] && break
sleep 1
done
[ $ret = 0 ] || echo_i "failed" [ $ret = 0 ] || echo_i "failed"
status=`expr $status + $ret` status=`expr $status + $ret`

View File

@@ -17,7 +17,7 @@ options {
pid-file "named.pid"; pid-file "named.pid";
listen-on { 10.53.0.3; }; listen-on { 10.53.0.3; };
listen-on-v6 { none; }; listen-on-v6 { none; };
recursion yes; recursion no;
notify yes; notify yes;
}; };

View File

@@ -21,8 +21,6 @@ DIGOPTS="+tcp +noadd +nosea +nostat +noquest +nocomm +nocmd -p ${PORT}"
status=0 status=0
n=1 n=1
sleep 5
echo_i "waiting for servers to be ready for testing ($n)" echo_i "waiting for servers to be ready for testing ($n)"
for i in 1 2 3 4 5 6 7 8 9 10 for i in 1 2 3 4 5 6 7 8 9 10
do do

View File

@@ -431,11 +431,17 @@ $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.1.$n
grep "status: NOERROR," dig.out.1.$n > /dev/null || tmp=1 grep "status: NOERROR," dig.out.1.$n > /dev/null || tmp=1
$PERL $SYSTEMTESTTOP/stop.pl xfer ns3 $PERL $SYSTEMTESTTOP/stop.pl xfer ns3
$PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} xfer ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} xfer ns3
$DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n for try in 0 1 2 3 4 5 6 7 8 9; do
grep "status: NOERROR," dig.out.2.$n > /dev/null || tmp=1 iret=0
$DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n
digcomp knowngood.mapped dig.out.3.$n || tmp=1 grep "status: NOERROR," dig.out.2.$n > /dev/null || iret=1
if test $tmp != 0 ; then echo_i "failed"; fi $DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n
digcomp knowngood.mapped dig.out.3.$n || iret=1
[ "$iret" -eq 0 ] && break
sleep 1
done
[ "$iret" -eq 0 ] || tmp=1
[ "$tmp" -ne 0 ] && echo_i "failed"
status=`expr $status + $tmp` status=`expr $status + $tmp`
n=`expr $n + 1` n=`expr $n + 1`