2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 05:57:52 +00:00

Add a hung fetch check while chasing DS in the forward system test

Implement TCP support in the `ans11` Python-based DNS server.

Implement a control command channel in `ans11` to support an optional
silent mode of operation, which, when enabled, will ignore incoming
queries.

In the added check, make the `ans11` the NS server of
"a.root-servers.nil." for `ns3`, so it uses `ans11` (in silent mode)
for the regular (non-forwarded) name resolutions.

This will trigger the "hung fetch" scenario, which was causing `named`
to crash.
This commit is contained in:
Aram Sargsyan 2022-02-11 15:10:39 +00:00 committed by Michal Nowak
parent 84914a0610
commit 848094d6f7
No known key found for this signature in database
GPG Key ID: 24A3E8463AEE5E56
4 changed files with 194 additions and 32 deletions

View File

@ -15,6 +15,7 @@ import sys
import signal
import socket
import select
import struct
from datetime import datetime, timedelta
import time
import functools
@ -30,6 +31,66 @@ def logquery(type, qname):
with open("qlog", "a") as f:
f.write("%s %s\n", type, qname)
# Create a UDP listener
def udp_listen(ip, port, is_ipv6 = False):
try:
udp = socket.socket(socket.AF_INET6 if is_ipv6 else socket.AF_INET,
socket.SOCK_DGRAM)
try:
udp.bind((ip, port))
except:
udp.close()
udp = None
except:
udp = None
if udp is None and not is_ipv6:
raise socket.error("Can not create an IPv4 UDP listener")
return udp
# Create a TCP listener
def tcp_listen(ip, port, is_ipv6 = False):
try:
tcp = socket.socket(socket.AF_INET6 if is_ipv6 else socket.AF_INET,
socket.SOCK_STREAM)
try:
tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp.bind((ip, port))
tcp.listen(100)
except:
tcp.close()
tcp = None
except:
tcp = None
if tcp is None and not is_ipv6:
raise socket.error("Can not create an IPv4 TCP listener")
return tcp
############################################################################
# Control channel - send "1" or "0" to enable or disable the "silent" mode.
############################################################################
silent = False
def ctrl_channel(msg):
global silent
msg = msg.splitlines().pop(0)
print("Received control message: %s" % msg)
if len(msg) != 1:
return
if silent:
if msg == b'0':
silent = False
print("Silent mode was disabled")
else:
if msg == b'1':
silent = True
print("Silent mode was enabled")
############################################################################
# Respond to a DNS query.
############################################################################
@ -79,18 +140,17 @@ ip6 = "fd92:7065:b8e:ffff::11"
try: port=int(os.environ['PORT'])
except: port=5300
query4_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
query4_socket.bind((ip4, port))
havev6 = True
try:
query6_socket = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
try:
query6_socket.bind((ip6, port))
except:
query6_socket.close()
havev6 = False
except:
havev6 = False
try: ctrlport=int(os.environ['EXTRAPORT1'])
except: ctrlport=5300
ctrl4_tcp = tcp_listen(ip4, ctrlport)
query4_udp = udp_listen(ip4, port)
query6_udp = udp_listen(ip6, port, is_ipv6=True)
query4_tcp = tcp_listen(ip4, port)
query6_tcp = tcp_listen(ip6, port, is_ipv6=True)
havev6 = query6_udp is not None and query6_tcp is not None
signal.signal(signal.SIGTERM, sigterm)
f = open('ans.pid', 'w')
@ -100,15 +160,19 @@ f.close()
running = True
print ("Listening on %s port %d" % (ip4, ctrlport))
print ("Listening on %s port %d" % (ip4, port))
if havev6:
print ("Listening on %s port %d" % (ip6, port))
print ("Ctrl-c to quit")
if havev6:
input = [query4_socket, query6_socket]
input = [ctrl4_tcp, query4_udp, query6_udp, query4_tcp, query6_tcp]
else:
input = [query4_socket]
input = [ctrl4_tcp, query4_udp, query4_tcp]
hung_conns = []
while running:
try:
@ -121,16 +185,71 @@ while running:
break
for s in inputready:
if s == query4_socket or s == query6_socket:
print ("Query received on %s" %
(ip4 if s == query4_socket else ip6), end=" ")
if s == ctrl4_tcp:
print("Control channel connected")
conn = None
try:
# Handle control channel input
conn, addr = s.accept()
msg = conn.recv(1)
if msg:
ctrl_channel(msg)
conn.close()
except s.timeout:
pass
if conn:
conn.close()
elif s == query4_tcp or s == query6_tcp:
print("TCP query received on %s" %
(ip4 if s == query4_tcp else ip6), end=" ")
conn = None
try:
# Handle incoming queries
conn, addr = s.accept()
if not silent:
# get TCP message length
msg = conn.recv(2)
if len(msg) != 2:
print("NO RESPONSE (can not read the message length)")
conn.close()
continue
length = struct.unpack('>H', msg[:2])[0]
msg = conn.recv(length)
if len(msg) != length:
print("NO RESPONSE (can not read the message)")
conn.close()
continue
rsp = create_response(msg)
if rsp:
print(dns.rcode.to_text(rsp.rcode()))
wire = rsp.to_wire()
conn.send(struct.pack('>H', len(wire)))
conn.send(wire)
else:
print("NO RESPONSE (can not create a response)")
else:
# Do not respond and hang the connection.
print("NO RESPONSE (silent mode)")
hung_conns.append(conn)
continue
except socket.error as e:
print("NO RESPONSE (error: %s)" % str(e))
if conn:
conn.close()
elif s == query4_udp or s == query6_udp:
print("UDP query received on %s" %
(ip4 if s == query4_udp else ip6), end=" ")
# Handle incoming queries
msg = s.recvfrom(65535)
rsp = create_response(msg[0])
if rsp:
print(dns.rcode.to_text(rsp.rcode()))
s.sendto(rsp.to_wire(), msg[1])
if not silent:
rsp = create_response(msg[0])
if rsp:
print(dns.rcode.to_text(rsp.rcode()))
s.sendto(rsp.to_wire(), msg[1])
else:
print("NO RESPONSE (can not create a response)")
else:
print("NO RESPONSE")
# Do not respond.
print("NO RESPONSE (silent mode)")
if not running:
break

View File

@ -15,7 +15,7 @@
rm -f ./dig.out.*
rm -f ./*/named.conf
rm -f ./*/named.memstats
rm -f ./*/named.run ./*/named.run.prev
rm -f ./*/named.run ./*/named.run.prev ./*/ans.run
rm -f ./*/named_dump.db
rm -f ./ns*/named.lock
rm -f ./ns*/managed-keys.bind*

View File

@ -0,0 +1,21 @@
; Copyright (C) Internet Systems Consortium, Inc. ("ISC")
;
; SPDX-License-Identifier: MPL-2.0
;
; This Source Code Form is subject to the terms of the Mozilla Public
; License, v. 2.0. If a copy of the MPL was not distributed with this
; file, you can obtain one at https://mozilla.org/MPL/2.0/.
;
; See the COPYRIGHT file distributed with this work for additional
; information regarding copyright ownership.
$TTL 300
. IN SOA gson.nominum.com. a.root.servers.nil. (
2000042100 ; serial
600 ; refresh
600 ; retry
1200 ; expire
600 ; minimum
)
. NS a.root-servers.nil.
a.root-servers.nil. A 10.53.0.11

View File

@ -17,7 +17,7 @@ dig_with_opts() (
)
sendcmd() (
send 10.53.0.6 "$EXTRAPORT1"
send "$1" "$EXTRAPORT1"
)
rndccmd() {
@ -185,7 +185,7 @@ n=$((n+1))
echo_i "checking that a forwarder timeout prevents it from being reused in the same fetch context ($n)"
ret=0
# Make ans6 receive queries without responding to them.
echo "//" | sendcmd
echo "//" | sendcmd 10.53.0.6
# Query for a record in a zone which is forwarded to a non-responding forwarder
# and is delegated from the root to check whether the forwarder will be retried
# when a delegation is encountered after falling back to full recursive
@ -235,25 +235,47 @@ grep "status: SERVFAIL" dig.out.$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
n=$((n+1))
echo_i "checking switch from forwarding to normal resolution while chasing DS ($n)"
ret=0
copy_setports ns3/named2.conf.in ns3/named.conf
rndccmd 10.53.0.3 reconfig 2>&1 | sed 's/^/ns3 /' | cat_i
sleep 1
sendcmd << EOF
# Prepare ans6 for the chasing DS tests.
sendcmd 10.53.0.6 << EOF
/ns1.sld.tld/A/
300 A 10.53.0.2
/sld.tld/NS/
300 NS ns1.sld.tld.
/sld.tld/
EOF
n=$((n+1))
echo_i "checking switch from forwarding to normal resolution while chasing DS ($n)"
ret=0
copy_setports ns3/named2.conf.in ns3/named.conf
rndccmd 10.53.0.3 reconfig 2>&1 | sed 's/^/ns3 /' | cat_i
sleep 1
nextpart ns3/named.run >/dev/null
dig_with_opts @$f1 xxx.yyy.sld.tld ds > dig.out.$n.f1 || ret=1
grep "status: SERVFAIL" dig.out.$n.f1 > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
# See [GL #3129].
# Enable silent mode for ans11.
echo "1" | sendcmd 10.53.0.11
n=$((n+1))
echo_i "checking the handling of hung DS fetch while chasing DS ($n)"
ret=0
copy_setports ns3/named2.conf.in ns3/tmp
sed 's/root.db/root2.db/' ns3/tmp > ns3/named.conf
rm -f ns3/tmp
rndccmd 10.53.0.3 reconfig 2>&1 | sed 's/^/ns3 /' | cat_i
rndccmd 10.53.0.3 flush 2>&1 | sed 's/^/ns3 /' | cat_i
sleep 1
nextpart ns3/named.run >/dev/null
dig_with_opts @$f1 xxx.yyy.sld.tld ds > dig.out.$n.f1 || ret=1
grep "status: SERVFAIL" dig.out.$n.f1 > /dev/null || ret=1
# Disable silent mode for ans11.
echo "0" | sendcmd 10.53.0.11
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
#
# Check various spoofed response scenarios. The same tests will be
# run twice, with "forward first" and "forward only" configurations.