mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-09-01 06:55:30 +00:00
Merge branch '3205-dig-tcp-next-server-on-connection-error-crash' into 'main'
Fix dig error when trying the next server after a TCP connection failure Closes #3205 See merge request isc-projects/bind9!5976
This commit is contained in:
5
CHANGES
5
CHANGES
@@ -1,3 +1,8 @@
|
|||||||
|
5833. [bug] When encountering socket error while trying to initiate
|
||||||
|
a TCP connection to a server, dig could hang
|
||||||
|
indefinitely, when there were more servers to try.
|
||||||
|
[GL #3205]
|
||||||
|
|
||||||
5832. [bug] When timing-out or having other types of socket errors
|
5832. [bug] When timing-out or having other types of socket errors
|
||||||
during a query, dig wasn't trying to perform the lookup
|
during a query, dig wasn't trying to perform the lookup
|
||||||
using other servers, in case they exist. [GL #3128]
|
using other servers, in case they exist. [GL #3128]
|
||||||
|
@@ -3293,8 +3293,10 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) {
|
|||||||
next = NULL;
|
next = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
cancel_lookup(l);
|
|
||||||
query_detach(&query);
|
query_detach(&query);
|
||||||
|
if (next == NULL) {
|
||||||
|
cancel_lookup(l);
|
||||||
|
}
|
||||||
lookup_detach(&l);
|
lookup_detach(&l);
|
||||||
|
|
||||||
if (next != NULL) {
|
if (next != NULL) {
|
||||||
|
@@ -20,7 +20,27 @@ import struct
|
|||||||
import dns, dns.message
|
import dns, dns.message
|
||||||
from dns.rcode import *
|
from dns.rcode import *
|
||||||
|
|
||||||
def create_response(msg):
|
modes = [
|
||||||
|
b"silent", # Do not respond
|
||||||
|
b"close", # UDP: same as silent; TCP: also close the connection
|
||||||
|
b"servfail", # Always respond with SERVFAIL
|
||||||
|
b"unstable", # Constantly switch between "silent" and "servfail"
|
||||||
|
]
|
||||||
|
mode = modes[0]
|
||||||
|
n = 0
|
||||||
|
|
||||||
|
def ctrl_channel(msg):
|
||||||
|
global modes, mode, n
|
||||||
|
|
||||||
|
msg = msg.splitlines().pop(0)
|
||||||
|
print("Received control message: %s" % msg)
|
||||||
|
|
||||||
|
if msg in modes:
|
||||||
|
mode = msg
|
||||||
|
n = 0
|
||||||
|
print("New mode: %s" % str(mode))
|
||||||
|
|
||||||
|
def create_servfail(msg):
|
||||||
m = dns.message.from_wire(msg)
|
m = dns.message.from_wire(msg)
|
||||||
qname = m.question[0].name.to_text()
|
qname = m.question[0].name.to_text()
|
||||||
rrtype = m.question[0].rdtype
|
rrtype = m.question[0].rdtype
|
||||||
@@ -45,6 +65,9 @@ ip4 = "10.53.0.8"
|
|||||||
try: port=int(os.environ["PORT"])
|
try: port=int(os.environ["PORT"])
|
||||||
except: port=5300
|
except: port=5300
|
||||||
|
|
||||||
|
try: ctrlport=int(os.environ['EXTRAPORT1'])
|
||||||
|
except: ctrlport=5300
|
||||||
|
|
||||||
query4_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
query4_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||||
query4_udp.bind((ip4, port))
|
query4_udp.bind((ip4, port))
|
||||||
|
|
||||||
@@ -53,6 +76,11 @@ query4_tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|||||||
query4_tcp.bind((ip4, port))
|
query4_tcp.bind((ip4, port))
|
||||||
query4_tcp.listen(100)
|
query4_tcp.listen(100)
|
||||||
|
|
||||||
|
ctrl4_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
ctrl4_tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
ctrl4_tcp.bind((ip4, ctrlport))
|
||||||
|
ctrl4_tcp.listen(100)
|
||||||
|
|
||||||
signal.signal(signal.SIGTERM, sigterm)
|
signal.signal(signal.SIGTERM, sigterm)
|
||||||
|
|
||||||
f = open("ans.pid", "w")
|
f = open("ans.pid", "w")
|
||||||
@@ -63,12 +91,11 @@ f.close()
|
|||||||
running = True
|
running = True
|
||||||
|
|
||||||
print ("Listening on %s port %d" % (ip4, port))
|
print ("Listening on %s port %d" % (ip4, port))
|
||||||
|
print ("Listening on %s port %d" % (ip4, ctrlport))
|
||||||
print ("Ctrl-c to quit")
|
print ("Ctrl-c to quit")
|
||||||
|
|
||||||
input = [query4_udp, query4_tcp]
|
input = [query4_udp, query4_tcp, ctrl4_tcp]
|
||||||
|
|
||||||
n_udp = 0
|
|
||||||
n_tcp = 0
|
|
||||||
hung_conns = []
|
hung_conns = []
|
||||||
|
|
||||||
while running:
|
while running:
|
||||||
@@ -83,54 +110,79 @@ while running:
|
|||||||
|
|
||||||
for s in inputready:
|
for s in inputready:
|
||||||
if s == query4_udp:
|
if s == query4_udp:
|
||||||
|
n = n + 1
|
||||||
print("UDP query received on %s" % ip4, end=" ")
|
print("UDP query received on %s" % ip4, end=" ")
|
||||||
n_udp = n_udp + 1
|
|
||||||
msg = s.recvfrom(65535)
|
msg = s.recvfrom(65535)
|
||||||
# Do not response to every other query.
|
if mode == b"silent" or mode == b"close" or (mode == b"unstable" and n % 2 == 1):
|
||||||
if n_udp % 2 == 1:
|
# Do not respond.
|
||||||
print("NO RESPONSE")
|
print("NO RESPONSE (%s)" % str(mode))
|
||||||
continue
|
continue
|
||||||
rsp = create_response(msg[0])
|
elif mode == b"servfail" or (mode == b"unstable" and n % 2 == 0):
|
||||||
if rsp:
|
rsp = create_servfail(msg[0])
|
||||||
print(dns.rcode.to_text(rsp.rcode()))
|
if rsp:
|
||||||
s.sendto(rsp.to_wire(), msg[1])
|
print(dns.rcode.to_text(rsp.rcode()))
|
||||||
|
s.sendto(rsp.to_wire(), msg[1])
|
||||||
|
else:
|
||||||
|
print("NO RESPONSE (can not create a response)")
|
||||||
else:
|
else:
|
||||||
print("NO RESPONSE")
|
raise(Exception("unsupported mode: %s" % mode))
|
||||||
elif s == query4_tcp:
|
elif s == query4_tcp:
|
||||||
|
n = n + 1
|
||||||
print("TCP query received on %s" % ip4, end=" ")
|
print("TCP query received on %s" % ip4, end=" ")
|
||||||
n_tcp = n_tcp + 1
|
|
||||||
conn = None
|
conn = None
|
||||||
try:
|
try:
|
||||||
conn, addr = s.accept()
|
if mode == b"silent" or (mode == b"unstable" and n % 2 == 1):
|
||||||
# Do not response to every other query, hang the connection.
|
conn, addr = s.accept()
|
||||||
if n_tcp % 2 == 1:
|
# Do not respond and hang the connection.
|
||||||
print("NO RESPONSE")
|
print("NO RESPONSE (%s)" % str(mode))
|
||||||
hung_conns.append(conn)
|
hung_conns.append(conn)
|
||||||
conn = None
|
|
||||||
continue
|
continue
|
||||||
else:
|
elif mode == b"close":
|
||||||
|
conn, addr = s.accept()
|
||||||
|
# Do not respond and close the connection.
|
||||||
|
print("NO RESPONSE (%s)" % str(mode))
|
||||||
|
conn.close()
|
||||||
|
continue
|
||||||
|
elif mode == b"servfail" or (mode == b"unstable" and n % 2 == 0):
|
||||||
|
conn, addr = s.accept()
|
||||||
# get TCP message length
|
# get TCP message length
|
||||||
msg = conn.recv(2)
|
msg = conn.recv(2)
|
||||||
if len(msg) != 2:
|
if len(msg) != 2:
|
||||||
print("NO RESPONSE")
|
print("NO RESPONSE (can not read the message length)")
|
||||||
conn.close()
|
conn.close()
|
||||||
continue
|
continue
|
||||||
length = struct.unpack('>H', msg[:2])[0]
|
length = struct.unpack('>H', msg[:2])[0]
|
||||||
msg = conn.recv(length)
|
msg = conn.recv(length)
|
||||||
if len(msg) != length:
|
if len(msg) != length:
|
||||||
print("NO RESPONSE")
|
print("NO RESPONSE (can not read the message)")
|
||||||
conn.close()
|
conn.close()
|
||||||
continue
|
continue
|
||||||
rsp = create_response(msg)
|
rsp = create_servfail(msg)
|
||||||
if rsp:
|
if rsp:
|
||||||
print(dns.rcode.to_text(rsp.rcode()))
|
print(dns.rcode.to_text(rsp.rcode()))
|
||||||
wire = rsp.to_wire()
|
wire = rsp.to_wire()
|
||||||
conn.send(struct.pack('>H', len(wire)))
|
conn.send(struct.pack('>H', len(wire)))
|
||||||
conn.send(wire)
|
conn.send(wire)
|
||||||
else:
|
else:
|
||||||
print("NO RESPONSE")
|
print("NO RESPONSE (can not create a response)")
|
||||||
except:
|
else:
|
||||||
print("NO RESPONSE")
|
raise(Exception("unsupported mode: %s" % mode))
|
||||||
|
except socket.error as e:
|
||||||
|
print("NO RESPONSE (error: %s)" % str(e))
|
||||||
|
if conn:
|
||||||
|
conn.close()
|
||||||
|
elif s == ctrl4_tcp:
|
||||||
|
print("Control channel connected")
|
||||||
|
conn = None
|
||||||
|
try:
|
||||||
|
# Handle control channel input
|
||||||
|
conn, addr = s.accept()
|
||||||
|
msg = conn.recv(1024)
|
||||||
|
if msg:
|
||||||
|
ctrl_channel(msg)
|
||||||
|
conn.close()
|
||||||
|
except s.timeout:
|
||||||
|
pass
|
||||||
if conn:
|
if conn:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
@@ -1001,6 +1001,8 @@ if [ -x "$DIG" ] ; then
|
|||||||
# See [GL #3020] for more information
|
# See [GL #3020] for more information
|
||||||
n=$((n+1))
|
n=$((n+1))
|
||||||
echo_i "check that dig handles UDP timeout followed by a SERVFAIL correctly ($n)"
|
echo_i "check that dig handles UDP timeout followed by a SERVFAIL correctly ($n)"
|
||||||
|
# Ask ans8 to be in "unstable" mode (switching between "silent" and "servfail" modes)
|
||||||
|
echo "unstable" | sendcmd 10.53.0.8
|
||||||
ret=0
|
ret=0
|
||||||
dig_with_opts +timeout=1 +nofail @10.53.0.8 a.example > dig.out.test$n 2>&1 || ret=1
|
dig_with_opts +timeout=1 +nofail @10.53.0.8 a.example > dig.out.test$n 2>&1 || ret=1
|
||||||
grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1
|
grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1
|
||||||
@@ -1009,6 +1011,8 @@ if [ -x "$DIG" ] ; then
|
|||||||
|
|
||||||
n=$((n+1))
|
n=$((n+1))
|
||||||
echo_i "check that dig handles TCP timeout followed by a SERVFAIL correctly ($n)"
|
echo_i "check that dig handles TCP timeout followed by a SERVFAIL correctly ($n)"
|
||||||
|
# Ask ans8 to be in "unstable" mode (switching between "silent" and "servfail" modes)
|
||||||
|
echo "unstable" | sendcmd 10.53.0.8
|
||||||
ret=0
|
ret=0
|
||||||
dig_with_opts +timeout=1 +nofail +tcp @10.53.0.8 a.example > dig.out.test$n 2>&1 || ret=1
|
dig_with_opts +timeout=1 +nofail +tcp @10.53.0.8 a.example > dig.out.test$n 2>&1 || ret=1
|
||||||
grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1
|
grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1
|
||||||
@@ -1016,19 +1020,54 @@ if [ -x "$DIG" ] ; then
|
|||||||
status=$((status+ret))
|
status=$((status+ret))
|
||||||
|
|
||||||
n=$((n+1))
|
n=$((n+1))
|
||||||
echo_i "check that dig tries the next server after a connection error ($n)"
|
echo_i "check that dig tries the next server after a UDP socket read error ($n)"
|
||||||
ret=0
|
ret=0
|
||||||
dig_with_opts -d @10.53.0.99 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1
|
dig_with_opts @10.53.0.99 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1
|
||||||
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
||||||
if [ $ret -ne 0 ]; then echo_i "failed"; fi
|
if [ $ret -ne 0 ]; then echo_i "failed"; fi
|
||||||
status=$((status+ret))
|
status=$((status+ret))
|
||||||
|
|
||||||
n=$((n+1))
|
n=$((n+1))
|
||||||
echo_i "check that dig tries the next server after timeouts ($n)"
|
echo_i "check that dig tries the next server after a TCP socket read error ($n)"
|
||||||
# Ask ans4 to not respond to queries
|
# Ask ans8 to be in "close" mode, which closes the connection after accepting it
|
||||||
echo "//" | sendcmd 10.53.0.4
|
echo "close" | sendcmd 10.53.0.8
|
||||||
ret=0
|
ret=0
|
||||||
dig_with_opts -d @10.53.0.4 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1
|
dig_with_opts +tcp @10.53.0.8 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1
|
||||||
|
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
||||||
|
if [ $ret -ne 0 ]; then echo_i "failed"; fi
|
||||||
|
status=$((status+ret))
|
||||||
|
|
||||||
|
# Note that we combine TCP socket "connection error" and "timeout" cases in
|
||||||
|
# one, because it is not trivial to simulate the timeout case in a system test
|
||||||
|
# in Linux without a firewall, but the code which handles error cases during
|
||||||
|
# the connection establishment time does not differentiate between timeout and
|
||||||
|
# other types of errors (unlike during reading), so this one check should be
|
||||||
|
# sufficient for both cases.
|
||||||
|
n=$((n+1))
|
||||||
|
echo_i "check that dig tries the next server after a TCP socket connection error/timeout ($n)"
|
||||||
|
ret=0
|
||||||
|
dig_with_opts +tcp @10.53.0.99 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1
|
||||||
|
test $(grep "connection refused\|timed out" dig.out.test$n | wc -l) -eq 3 || ret=1
|
||||||
|
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
||||||
|
if [ $ret -ne 0 ]; then echo_i "failed"; fi
|
||||||
|
status=$((status+ret))
|
||||||
|
|
||||||
|
n=$((n+1))
|
||||||
|
echo_i "check that dig tries the next server after UDP socket read timeouts ($n)"
|
||||||
|
# Ask ans8 to be in "silent" mode
|
||||||
|
echo "silent" | sendcmd 10.53.0.8
|
||||||
|
ret=0
|
||||||
|
dig_with_opts +timeout=1 @10.53.0.8 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1
|
||||||
|
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
||||||
|
if [ $ret -ne 0 ]; then echo_i "failed"; fi
|
||||||
|
status=$((status+ret))
|
||||||
|
|
||||||
|
n=$((n+1))
|
||||||
|
echo_i "check that dig tries the next server after TCP socket read timeouts ($n)"
|
||||||
|
# Ask ans8 to be in "silent" mode
|
||||||
|
echo "silent" | sendcmd 10.53.0.8
|
||||||
|
ret=0
|
||||||
|
dig_with_opts +timeout=1 +tcp @10.53.0.8 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1
|
||||||
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
||||||
if [ $ret -ne 0 ]; then echo_i "failed"; fi
|
if [ $ret -ne 0 ]; then echo_i "failed"; fi
|
||||||
status=$((status+ret))
|
status=$((status+ret))
|
||||||
|
Reference in New Issue
Block a user