From 24510a1fda71d1c3c47eb2e69a9694ecf1fa2ef3 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Tue, 5 Nov 2019 16:14:06 -0800 Subject: [PATCH] adjust system tests to deal with possible timing issues With the netmgr in use, named may start answering queries before zones are loaded. This can cause transient failures in system tests after servers are restarted or reconfigured. This commit adds retry loops and sleep statements where needed to address this problem. Also incidentally silenced a clang warning. --- bin/tests/system/addzone/tests.sh | 12 ++++-- bin/tests/system/dlzexternal/driver.c | 6 +-- bin/tests/system/ecdsa/tests.sh | 1 - bin/tests/system/forward/tests.sh | 13 ++++-- bin/tests/system/legacy/clean.sh | 1 + bin/tests/system/legacy/tests.sh | 9 +++- bin/tests/system/nsupdate/tests.sh | 17 +++++--- bin/tests/system/nzd2nzf/tests.sh | 11 +++-- bin/tests/system/resolver/ns7/named2.conf.in | 2 +- bin/tests/system/rpz/tests.sh | 2 + bin/tests/system/rpzrecurse/tests.sh | 1 + bin/tests/system/statistics/tests.sh | 4 +- bin/tests/system/tcp/ans6/ans.py | 3 +- bin/tests/system/tcp/tests.sh | 4 ++ bin/tests/system/unknown/tests.sh | 44 ++++++++++++++------ bin/tests/system/upforwd/ns3/named.conf.in | 2 +- bin/tests/system/upforwd/tests.sh | 2 - bin/tests/system/xfer/tests.sh | 16 ++++--- 18 files changed, 105 insertions(+), 45 deletions(-) diff --git a/bin/tests/system/addzone/tests.sh b/bin/tests/system/addzone/tests.sh index 4e5301f6ae..a4c1ca425d 100755 --- a/bin/tests/system/addzone/tests.sh +++ b/bin/tests/system/addzone/tests.sh @@ -696,11 +696,17 @@ $RNDCCMD 10.53.0.3 addzone "test4.baz" '{ type master; file "e.db"; };' > /dev/n $RNDCCMD 10.53.0.3 addzone "test5.baz" '{ type master; file "e.db"; };' > /dev/null 2>&1 || ret=1 $PERL $SYSTEMTESTTOP/stop.pl addzone ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} addzone ns3 || ret=1 -$DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || ret=1 -grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 -n=`expr $n + 1` +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG $DIGOPTS @10.53.0.3 version.bind txt ch > dig.out.test$n || iret=1 + grep "status: NOERROR" dig.out.test$n > /dev/null || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -ne 0 ] && ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` +n=`expr $n + 1` echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/bin/tests/system/dlzexternal/driver.c b/bin/tests/system/dlzexternal/driver.c index b969b9e053..4774118a0b 100644 --- a/bin/tests/system/dlzexternal/driver.c +++ b/bin/tests/system/dlzexternal/driver.c @@ -124,13 +124,13 @@ add_name(struct dlz_example_data *state, struct record *list, strlen(data) >= sizeof(list[i].data)) return (ISC_R_NOSPACE); - strncpy(list[i].name, name, sizeof(list[i].name)); + strncpy(list[i].name, name, sizeof(list[i].name) - 1); list[i].name[sizeof(list[i].name) - 1] = '\0'; - strncpy(list[i].type, type, sizeof(list[i].type)); + strncpy(list[i].type, type, sizeof(list[i].type) - 1); list[i].type[sizeof(list[i].type) - 1] = '\0'; - strncpy(list[i].data, data, sizeof(list[i].data)); + strncpy(list[i].data, data, sizeof(list[i].data) - 1); list[i].data[sizeof(list[i].data) - 1] = '\0'; list[i].ttl = ttl; diff --git a/bin/tests/system/ecdsa/tests.sh b/bin/tests/system/ecdsa/tests.sh index c4ceefc346..7cddfd6ce5 100644 --- a/bin/tests/system/ecdsa/tests.sh +++ b/bin/tests/system/ecdsa/tests.sh @@ -20,7 +20,6 @@ rm -f dig.out.* DIGOPTS="+tcp +noau +noadd +nosea +nostat +nocmd +dnssec -p 5300" # Check the example. domain - echo "I:checking that positive validation works ($n)" ret=0 $DIG $DIGOPTS . @10.53.0.1 soa > dig.out.ns1.test$n || ret=1 diff --git a/bin/tests/system/forward/tests.sh b/bin/tests/system/forward/tests.sh index 36fd8a0040..1c3096cb79 100644 --- a/bin/tests/system/forward/tests.sh +++ b/bin/tests/system/forward/tests.sh @@ -98,10 +98,15 @@ status=`expr $status + $ret` echo_i "checking that forward only zone overrides empty zone" ret=0 -$DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 -grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1 -$DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 -grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1 +# retry loop in case the server restart above causes transient failure +for try in 0 1 2 3 4 5 6 7 8 9; do + $DIG $DIGOPTS 1.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 + grep "status: NOERROR" dig.out.f2 > /dev/null || ret=1 + $DIG $DIGOPTS 2.0.10.in-addr.arpa TXT @10.53.0.4 > dig.out.f2 + grep "status: NXDOMAIN" dig.out.f2 > /dev/null || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/legacy/clean.sh b/bin/tests/system/legacy/clean.sh index ad7ef8540d..f883185746 100644 --- a/bin/tests/system/legacy/clean.sh +++ b/bin/tests/system/legacy/clean.sh @@ -14,6 +14,7 @@ rm -f ns*/named.run rm -f ns*/named.lock # build.sh +rm -f ns1/named_dump.db* rm -f ns6/K* rm -f ns6/dsset-* rm -f ns6/edns512.db diff --git a/bin/tests/system/legacy/tests.sh b/bin/tests/system/legacy/tests.sh index ed784d9615..8cbbeef53b 100755 --- a/bin/tests/system/legacy/tests.sh +++ b/bin/tests/system/legacy/tests.sh @@ -259,8 +259,13 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} legacy ns1 n=`expr $n + 1` echo_i "checking recursive lookup to edns 512 + no tcp + trust anchor fails ($n)" -ret=0 -resolution_fails edns512-notcp. || ret=1 +# retry loop in case the server restart above causes transient failure +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + resolution_fails edns512-notcp. || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/nsupdate/tests.sh b/bin/tests/system/nsupdate/tests.sh index b73d1785bb..83922eda73 100755 --- a/bin/tests/system/nsupdate/tests.sh +++ b/bin/tests/system/nsupdate/tests.sh @@ -506,7 +506,6 @@ grep "add nsec3param.test. 0 IN TYPE65534 .# 6 000140000400" jp.out.ns3.$n > /de if [ $ret != 0 ] ; then echo_i "failed"; status=`expr $ret + $status`; fi - ret=0 echo_i "testing that rndc stop updates the master file" $NSUPDATE -k ns1/ddns.key < /dev/null || ret=1 @@ -514,16 +513,24 @@ server 10.53.0.1 ${PORT} update add updated4.example.nil. 600 A 10.10.10.3 send END +sleep 3 $PERL $SYSTEMTESTTOP/stop.pl --use-rndc --port ${CONTROLPORT} nsupdate ns1 +sleep 3 # Removing the journal file and restarting the server means # that the data served by the new server process are exactly # those dumped to the master file by "rndc stop". rm -f ns1/*jnl $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nsupdate ns1 -$DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd updated4.example.nil.\ - @10.53.0.1 a > dig.out.ns1 || status=1 -digcomp knowngood.ns1.afterstop dig.out.ns1 || ret=1 -[ $ret = 0 ] || { echo_i "failed"; status=1; } +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG $DIGOPTS +tcp +noadd +nosea +nostat +noquest +nocomm +nocmd \ + updated4.example.nil. @10.53.0.1 a > dig.out.ns1 || iret=1 + digcomp knowngood.ns1.afterstop dig.out.ns1 || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -ne 0 ] && ret=1 +[ "$ret" -eq 0 ] || { echo_i "failed"; status=1; } ret=0 echo_i "check that 'nsupdate -l' with a missing keyfile reports the missing file" diff --git a/bin/tests/system/nzd2nzf/tests.sh b/bin/tests/system/nzd2nzf/tests.sh index ea013af48d..34ede6e4e7 100644 --- a/bin/tests/system/nzd2nzf/tests.sh +++ b/bin/tests/system/nzd2nzf/tests.sh @@ -61,9 +61,14 @@ $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} nzd2nzf ns1 n=`expr $n + 1` echo_i "querying for zone data from migrated zone config ($n)" -ret=0 -$DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1 -grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1 +# retry loop in case the server restart above causes transient failures +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.1 a.added.example a > dig.out.ns1.$n || ret=1 + grep 'status: NOERROR' dig.out.ns1.$n > /dev/null || ret=1 + [ "$ret" -eq 0 ] && break + sleep 1 +done n=`expr $n + 1` if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/bin/tests/system/resolver/ns7/named2.conf.in b/bin/tests/system/resolver/ns7/named2.conf.in index b966e783b7..787705984d 100644 --- a/bin/tests/system/resolver/ns7/named2.conf.in +++ b/bin/tests/system/resolver/ns7/named2.conf.in @@ -12,7 +12,7 @@ // NS7 options { - query-source address 10.53.0.7 port @PORT@ dscp 13; + query-source address 10.53.0.7 dscp 13; notify-source 10.53.0.7 dscp 14; transfer-source 10.53.0.7 dscp 15; port @PORT@; diff --git a/bin/tests/system/rpz/tests.sh b/bin/tests/system/rpz/tests.sh index 255779f785..88f74d0576 100644 --- a/bin/tests/system/rpz/tests.sh +++ b/bin/tests/system/rpz/tests.sh @@ -219,6 +219,7 @@ restart () { $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz ns$1 load_db dnsrps_loaded + sleep 1 } # $1=server and irrelevant args @@ -465,6 +466,7 @@ for mode in native dnsrps; do else echo_i "running DNSRPS sub-test" $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpz + sleep 3 fi ;; esac diff --git a/bin/tests/system/rpzrecurse/tests.sh b/bin/tests/system/rpzrecurse/tests.sh index 763cc2b5f8..11160cacdf 100644 --- a/bin/tests/system/rpzrecurse/tests.sh +++ b/bin/tests/system/rpzrecurse/tests.sh @@ -135,6 +135,7 @@ for mode in native dnsrps; do else echo_i "running DNSRPS sub-test" $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} rpzrecurse + sleep 3 fi ;; esac diff --git a/bin/tests/system/statistics/tests.sh b/bin/tests/system/statistics/tests.sh index ccbca44996..ce82d0d2a8 100644 --- a/bin/tests/system/statistics/tests.sh +++ b/bin/tests/system/statistics/tests.sh @@ -71,7 +71,7 @@ $RNDCCMD -s 10.53.0.3 stats > /dev/null 2>&1 [ -f ns3/named.stats ] || ret=1 if [ ! "$CYGWIN" ]; then nsock0nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'` - [ 0 -ne ${nsock0nstat:-0} ] || ret=1 + [ 0 -eq ${nsock0nstat:-0} ] || ret=1 fi if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` @@ -107,7 +107,7 @@ if [ ! "$CYGWIN" ]; then ret=0 echo_i "verifying active sockets output in named.stats ($n)" nsock1nstat=`grep "UDP/IPv4 sockets active" ns3/named.stats | awk '{print $1}'` - [ `expr $nsock1nstat - $nsock0nstat` -eq 1 ] || ret=1 + [ `expr ${nsock1nstat:-0} - ${nsock0nstat:-0}` -eq 1 ] || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` n=`expr $n + 1` diff --git a/bin/tests/system/tcp/ans6/ans.py b/bin/tests/system/tcp/ans6/ans.py index 3debf19e20..331ac7fbd1 100644 --- a/bin/tests/system/tcp/ans6/ans.py +++ b/bin/tests/system/tcp/ans6/ans.py @@ -42,7 +42,7 @@ import time # Timeout for establishing all connections requested by a single 'open' command. OPEN_TIMEOUT = 2 - +VERSION_QUERY = b'\x00\x1e\xaf\xb8\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x07version\x04bind\x00\x00\x10\x00\x03' def log(msg): print(datetime.datetime.now().strftime('%d-%b-%Y %H:%M:%S.%f ') + msg) @@ -84,6 +84,7 @@ def open_connections(active_conns, count, host, port): log('%s for socket %s' % (errno.errorcode[err], sock)) errors.append(sock) else: + sock.send(VERSION_QUERY) active_conns.append(sock) if errors: diff --git a/bin/tests/system/tcp/tests.sh b/bin/tests/system/tcp/tests.sh index 3af9432031..faf2e1ba78 100644 --- a/bin/tests/system/tcp/tests.sh +++ b/bin/tests/system/tcp/tests.sh @@ -163,8 +163,12 @@ check_stats_limit() { assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || return 1 } retry 2 check_stats_limit || ret=1 +close_connections $((TCP_LIMIT + 1)) if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status + ret)) +# wait for connections to close +sleep 5 + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/bin/tests/system/unknown/tests.sh b/bin/tests/system/unknown/tests.sh index 190b84020d..eeb8920ffa 100644 --- a/bin/tests/system/unknown/tests.sh +++ b/bin/tests/system/unknown/tests.sh @@ -122,16 +122,24 @@ do done echo_i "checking large unknown record loading on master" -ret=0 -$DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.1 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` echo_i "checking large unknown record loading on slave" -ret=0 -$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` @@ -139,10 +147,16 @@ echo_i "stop and restart slave" $PERL $SYSTEMTESTTOP/stop.pl unknown ns2 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns2 +# server may be answering queries before zones are loaded, +# so retry a few times if this query fails echo_i "checking large unknown record loading on slave" -ret=0 -$DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.2 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF -s large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` @@ -157,10 +171,16 @@ echo_i "stop and restart inline slave" $PERL $SYSTEMTESTTOP/stop.pl unknown ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} unknown ns3 +# server may be answering queries before zones are loaded, +# so retry a few times if this query fails echo_i "checking large unknown record loading on inline slave" -ret=0 -$DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } -$DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } +for try in 0 1 2 3 4 5 6 7 8 9; do + ret=0 + $DIG $DIGOPTS @10.53.0.3 +tcp +short large.example TYPE45234 > dig.out || { ret=1 ; echo_i "dig failed" ; } + $DIFF large.out dig.out > /dev/null || { ret=1 ; echo_i "$DIFF failed"; } + [ "$ret" -eq 0 ] && break + sleep 1 +done [ $ret = 0 ] || echo_i "failed" status=`expr $status + $ret` diff --git a/bin/tests/system/upforwd/ns3/named.conf.in b/bin/tests/system/upforwd/ns3/named.conf.in index e440a1f0d2..d037e745e8 100644 --- a/bin/tests/system/upforwd/ns3/named.conf.in +++ b/bin/tests/system/upforwd/ns3/named.conf.in @@ -17,7 +17,7 @@ options { pid-file "named.pid"; listen-on { 10.53.0.3; }; listen-on-v6 { none; }; - recursion yes; + recursion no; notify yes; }; diff --git a/bin/tests/system/upforwd/tests.sh b/bin/tests/system/upforwd/tests.sh index b0694bbd5c..3b0d7b3998 100644 --- a/bin/tests/system/upforwd/tests.sh +++ b/bin/tests/system/upforwd/tests.sh @@ -21,8 +21,6 @@ DIGOPTS="+tcp +noadd +nosea +nostat +noquest +nocomm +nocmd -p ${PORT}" status=0 n=1 -sleep 5 - echo_i "waiting for servers to be ready for testing ($n)" for i in 1 2 3 4 5 6 7 8 9 10 do diff --git a/bin/tests/system/xfer/tests.sh b/bin/tests/system/xfer/tests.sh index 11a27cb364..13fc762c4a 100755 --- a/bin/tests/system/xfer/tests.sh +++ b/bin/tests/system/xfer/tests.sh @@ -431,11 +431,17 @@ $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.1.$n grep "status: NOERROR," dig.out.1.$n > /dev/null || tmp=1 $PERL $SYSTEMTESTTOP/stop.pl xfer ns3 $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} xfer ns3 -$DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n -grep "status: NOERROR," dig.out.2.$n > /dev/null || tmp=1 -$DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n -digcomp knowngood.mapped dig.out.3.$n || tmp=1 -if test $tmp != 0 ; then echo_i "failed"; fi +for try in 0 1 2 3 4 5 6 7 8 9; do + iret=0 + $DIG -p ${PORT} txt mapped @10.53.0.3 > dig.out.2.$n + grep "status: NOERROR," dig.out.2.$n > /dev/null || iret=1 + $DIG -p ${PORT} axfr mapped @10.53.0.3 > dig.out.3.$n + digcomp knowngood.mapped dig.out.3.$n || iret=1 + [ "$iret" -eq 0 ] && break + sleep 1 +done +[ "$iret" -eq 0 ] || tmp=1 +[ "$tmp" -ne 0 ] && echo_i "failed" status=`expr $status + $tmp` n=`expr $n + 1`