2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 05:57:52 +00:00

Merge branch '3026-premature-tcp-connection-closure-leaks-fetch-contexts-hang-on-shutdown' into 'main'

Shutdown all active TCP connections on error

Closes #3026

See merge request isc-projects/bind9!5599
This commit is contained in:
Evan Hunt 2021-12-01 20:10:44 +00:00
commit 4ecbc3d09f
14 changed files with 482 additions and 104 deletions

View File

@ -1,3 +1,7 @@
5772. [bug] The resolver could hang on shutdown due to dispatch
resources not being cleaned up when a TCP connection
was reset. [GL #3026]
5771. [bug] Use idn2 UseSTD3ASCIIRules=false to disable additional
unicode validity checks because enabling the additional
checks would break valid domain names that contains

View File

@ -213,7 +213,7 @@ if HAVE_PYTHON
TESTS += kasp keymgr2kasp tcp pipelined
if HAVE_PYMOD_DNS
TESTS += checkds qmin cookie timeouts
TESTS += checkds dispatch qmin cookie timeouts
if HAVE_PERLMOD_NET_DNS
TESTS += dnssec

View File

@ -0,0 +1,99 @@
############################################################################
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
############################################################################
import os
import select
import signal
import socket
import sys
import time
import dns.flags
import dns.message
def port():
env_port = os.getenv("PORT")
if env_port is None:
env_port = 5300
else:
env_port = int(env_port)
return env_port
def udp_listen(port):
udp = socket.socket(type=socket.SOCK_DGRAM)
udp.bind(('10.53.0.3', port))
return udp
def tcp_listen(port):
tcp = socket.socket(type=socket.SOCK_STREAM)
tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp.bind(('10.53.0.3', port))
tcp.listen(100)
return tcp
def udp_tc_once(udp):
qrybytes, clientaddr = udp.recvfrom(65535)
qry = dns.message.from_wire(qrybytes)
answ = dns.message.make_response(qry)
answ.flags |= dns.flags.TC
answbytes = answ.to_wire()
udp.sendto(answbytes, clientaddr)
def tcp_once(tcp):
csock, _clientaddr = tcp.accept()
time.sleep(5)
csock.close()
def sigterm(signum, frame):
os.remove('ans.pid')
sys.exit(0)
def write_pid():
with open('ans.pid', 'w') as f:
pid = os.getpid()
f.write("{}".format(pid))
signal.signal(signal.SIGTERM, sigterm)
write_pid()
udp = udp_listen(port())
tcp = tcp_listen(port())
input = [udp, tcp]
while True:
try:
inputready, outputready, exceptready = select.select(input, [], [])
except select.error:
break
except socket.error:
break
except KeyboardInterrupt:
break
for s in inputready:
if s == udp:
udp_tc_once(udp)
if s == tcp:
tcp_once(tcp)
sigterm(signal.SIGTERM, 0)

View File

@ -0,0 +1,12 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
rm -f ns*/named.run ns*/named.conf ns*/named.pid ns*/managed-keys.bind*
rm -f ans*/ans.run ans*/ans.pid
rm -f ns*/named.memstats

View File

@ -0,0 +1,25 @@
############################################################################
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
############################################################################
import os
import pytest
@pytest.fixture
def port(request):
# pylint: disable=unused-argument
env_port = os.getenv("PORT")
if env_port is None:
env_port = 5300
else:
env_port = int(env_port)
return env_port

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
key rndc_key {
secret "1234abcd8765";
algorithm hmac-sha256;
};
controls {
inet 10.53.0.1 port @CONTROLPORT@ allow { any; } keys { rndc_key; };
};
options {
port @PORT@;
pid-file "named.pid";
listen-on { 10.53.0.1; };
query-source address 10.53.0.1;
notify-source 10.53.0.1;
transfer-source 10.53.0.1;
listen-on-v6 { fd92:7065:b8e:ffff::1; };
query-source-v6 address fd92:7065:b8e:ffff::1;
notify-source-v6 fd92:7065:b8e:ffff::1;
transfer-source-v6 fd92:7065:b8e:ffff::1;
recursion no;
servfail-ttl 0;
};
zone "." {
type primary;
file "root.db";
};

View File

@ -0,0 +1,14 @@
; Copyright (C) Internet Systems Consortium, Inc. ("ISC")
;
; This Source Code Form is subject to the terms of the Mozilla Public
; License, v. 2.0. If a copy of the MPL was not distributed with this
; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;
; See the COPYRIGHT file distributed with this work for additional
; information regarding copyright ownership.
. 300 SOA . . 0 0 0 0 0
. 300 NS ns.nil.
ns.nil. 300 A 10.53.0.1
example. 300 NS ns.example.
ns.example. 300 A 10.53.0.2

View File

@ -0,0 +1,6 @@
example. 86400 IN SOA ns.example. root.example. 43 10800 900 604800 86400
example. 86400 IN NS ns.example.
ns.example. A 10.53.0.2
ns.sub.example. A 10.53.0.3
sub.example. NS ns.sub.example.

View File

@ -0,0 +1,47 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
key rndc_key {
secret "1234abcd8765";
algorithm hmac-sha256;
};
controls {
inet 10.53.0.2 port @CONTROLPORT@ allow { any; } keys { rndc_key; };
};
options {
port @PORT@;
pid-file "named.pid";
listen-on { 10.53.0.2; };
query-source address 10.53.0.2;
notify-source 10.53.0.2;
transfer-source 10.53.0.2;
listen-on-v6 { fd92:7065:b8e:ffff::2; };
query-source-v6 address fd92:7065:b8e:ffff::2;
notify-source-v6 fd92:7065:b8e:ffff::2;
transfer-source-v6 fd92:7065:b8e:ffff::2;
recursion yes;
servfail-ttl 0;
};
zone "." {
type hint;
file "../../common/root.hint";
};
zone "example" {
type primary;
file "example.db";
};

View File

@ -0,0 +1,15 @@
#!/bin/sh
#
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
. ../conf.sh
copy_setports ns1/named.conf.in ns1/named.conf
copy_setports ns2/named.conf.in ns2/named.conf

View File

@ -0,0 +1,25 @@
#!/usr/bin/python3
############################################################################
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
############################################################################
import pytest
pytest.importorskip("dns")
import dns.message
import dns.query
import dns.rcode
def test_connreset(port):
msg = dns.message.make_query("sub.example.", "A", want_dnssec=True,
use_edns=0, payload=1232)
ans = dns.query.udp(msg, "10.53.0.2", timeout=10, port=port)
assert ans.rcode() == dns.rcode.SERVFAIL

View File

@ -56,3 +56,6 @@ Bug Fixes
``rndc reconfig``, then bringing back the removed ``catalog-zone`` clause and
running ``rndc reconfig`` again caused ``named`` to crash. This has been fixed.
:gl:`#1608`
- The resolver could hang on shutdown due to dispatch resources not being
cleaned up when a TCP connection was reset. This has been fixed. :gl:`#3026`

View File

@ -134,6 +134,8 @@ struct dns_dispatch {
unsigned int requests; /*%< how many requests we have */
unsigned int tcpbuffers; /*%< allocated buffers */
unsigned int timedout;
};
#define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ')
@ -595,6 +597,134 @@ done:
dispentry_detach(&resp);
}
static isc_result_t
tcp_recv_timeout(dns_dispatch_t *disp, dns_dispentry_t **respp) {
dns_dispentry_t *resp = ISC_LIST_HEAD(disp->active);
if (resp != NULL) {
dispentry_attach(resp, &(dns_dispentry_t *){ NULL });
ISC_LIST_UNLINK(disp->active, resp, alink);
ISC_LIST_APPEND(disp->active, resp, alink);
disp->timedout++;
*respp = resp;
return (ISC_R_TIMEDOUT);
}
return (ISC_R_NOTFOUND);
}
static isc_result_t
tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid,
isc_sockaddr_t *peer, dns_dispentry_t **respp) {
isc_buffer_t source;
dns_messageid_t id;
unsigned int flags;
unsigned int bucket;
isc_result_t result = ISC_R_SUCCESS;
dns_dispentry_t *resp = NULL;
dispatch_log(disp, LVL(90), "success, length == %d, addr = %p",
region->length, region->base);
/*
* Peek into the buffer to see what we can see.
*/
isc_buffer_init(&source, region->base, region->length);
isc_buffer_add(&source, region->length);
result = dns_message_peekheader(&source, &id, &flags);
if (result != ISC_R_SUCCESS) {
dispatch_log(disp, LVL(10), "got garbage packet");
return (ISC_R_UNEXPECTED);
}
dispatch_log(disp, LVL(92),
"got valid DNS message header, /QR %c, id %u",
(((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
/*
* Look at the message flags. If it's a query, ignore it and keep
* reading.
*/
if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
dispatch_log(disp, LVL(10), "got DNS query instead of answer");
return (ISC_R_UNEXPECTED);
}
/*
* We have a valid response; find the associated dispentry object
* and call the caller back.
*/
bucket = dns_hash(qid, peer, id, disp->localport);
LOCK(&qid->lock);
resp = entry_search(qid, peer, id, disp->localport, bucket);
if (resp != NULL) {
dispentry_attach(resp, &(dns_dispentry_t *){ NULL });
*respp = resp;
} else if (disp->timedout > 0) {
/* There was active query that timed-out before */
disp->timedout--;
resp = ISC_LIST_HEAD(disp->active);
if (resp != NULL) {
/*
* It's a DNS response, but didn't match any outstanding
* queries. It's possible we would have timed out by
* now, but non-matching responses prevented it, so we
* check the age of the oldest active resp.
*/
int timeout = resp->timeout - dispentry_runtime(resp);
if (timeout <= 0) {
result = tcp_recv_timeout(disp, respp);
}
} else {
result = ISC_R_NOTFOUND;
}
} else {
/* We are not expecting this DNS message */
result = ISC_R_UNEXPECTED;
}
dispatch_log(disp, LVL(90), "search for response in bucket %d: %s",
bucket, isc_result_totext(result));
UNLOCK(&qid->lock);
return (result);
}
static void
tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps) {
dns_dispentry_t *resp = NULL, *next = NULL;
/*
* If there are any active responses, shut them all down.
*/
for (resp = ISC_LIST_HEAD(disp->active); resp != NULL; resp = next) {
next = ISC_LIST_NEXT(resp, alink);
dispentry_attach(resp, &(dns_dispentry_t *){ NULL });
ISC_LIST_UNLINK(disp->active, resp, alink);
ISC_LIST_APPEND(*resps, resp, rlink);
}
}
static void
tcp_recv_done(dns_dispentry_t *resp, isc_result_t eresult,
isc_region_t *region) {
resp->response(eresult, region, resp->arg);
dispentry_detach(&resp);
}
static void
tcp_recv_cancelall(dns_displist_t *resps, isc_region_t *region) {
dns_dispentry_t *resp = NULL, *next = NULL;
for (resp = ISC_LIST_HEAD(*resps); resp != NULL; resp = next) {
next = ISC_LIST_NEXT(resp, rlink);
ISC_LIST_UNLINK(*resps, resp, rlink);
resp->response(ISC_R_SHUTTINGDOWN, region, resp->arg);
dispentry_detach(&resp);
}
}
/*
* General flow:
*
@ -611,18 +741,12 @@ done:
* restart.
*/
static void
tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region,
void *arg) {
dns_dispatch_t *disp = (dns_dispatch_t *)arg;
dns_dispentry_t *resp = NULL, *next = NULL;
dns_messageid_t id;
isc_result_t dres;
unsigned int flags;
unsigned int bucket;
dns_dispentry_t *resp = NULL;
dns_qid_t *qid = NULL;
int level;
char buf[ISC_SOCKADDR_FORMATSIZE];
isc_buffer_t source;
isc_sockaddr_t peer;
dns_displist_t resps;
@ -630,36 +754,26 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
qid = disp->mgr->qid;
ISC_LIST_INIT(resps);
LOCK(&disp->lock);
dispatch_log(disp, LVL(90), "TCP read:%s:requests %d, buffers %d",
isc_result_totext(eresult), disp->requests,
isc_result_totext(result), disp->requests,
disp->tcpbuffers);
peer = isc_nmhandle_peeraddr(handle);
ISC_LIST_INIT(resps);
switch (eresult) {
case ISC_R_SUCCESS:
/* got our answer */
break;
switch (result) {
case ISC_R_SHUTTINGDOWN:
case ISC_R_CANCELED:
case ISC_R_EOF:
dispatch_log(disp, LVL(90), "shutting down: %s",
isc_result_totext(eresult));
/*
* If there are any active responses, shut them all down.
*/
for (resp = ISC_LIST_HEAD(disp->active); resp != NULL;
resp = next) {
next = ISC_LIST_NEXT(resp, alink);
dispentry_attach(resp, &(dns_dispentry_t *){ NULL });
ISC_LIST_UNLINK(disp->active, resp, alink);
ISC_LIST_APPEND(resps, resp, rlink);
}
goto done;
case ISC_R_CONNECTIONRESET:
isc_sockaddr_format(&peer, buf, sizeof(buf));
dispatch_log(disp, LVL(90), "shutting down TCP: %s: %s", buf,
isc_result_totext(result));
tcp_recv_shutdown(disp, &resps);
break;
case ISC_R_TIMEDOUT:
/*
@ -668,90 +782,55 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
* active queue immediately, though, because the callback
* might decide to keep waiting and leave it active.)
*/
resp = ISC_LIST_HEAD(disp->active);
if (resp != NULL) {
dispentry_attach(resp, &(dns_dispentry_t *){ NULL });
ISC_LIST_UNLINK(disp->active, resp, alink);
ISC_LIST_APPEND(disp->active, resp, alink);
}
goto done;
result = tcp_recv_timeout(disp, &resp);
break;
case ISC_R_SUCCESS:
/* We got an answer */
result = tcp_recv_success(disp, region, qid, &peer, &resp);
if (result != ISC_R_UNEXPECTED) {
/*
* It's a valid DNS response, which may or may not
* have matched an outstanding query.
*/
dispatch_getnext(disp, NULL, -1);
break;
}
/* Got an invalid DNS response, terminate the connection */
/* FALLTHROUGH */
default:
if (eresult == ISC_R_CONNECTIONRESET) {
level = ISC_LOG_INFO;
} else {
level = ISC_LOG_ERROR;
}
isc_sockaddr_format(&peer, buf, sizeof(buf));
dispatch_log(disp, level,
dispatch_log(disp, ISC_LOG_ERROR,
"shutting down due to TCP "
"receive error: %s: %s",
buf, isc_result_totext(eresult));
goto done;
buf, isc_result_totext(result));
tcp_recv_shutdown(disp, &resps);
break;
}
dispatch_log(disp, LVL(90), "success, length == %d, addr = %p",
region->length, region->base);
/*
* Peek into the buffer to see what we can see.
*/
isc_buffer_init(&source, region->base, region->length);
isc_buffer_add(&source, region->length);
dres = dns_message_peekheader(&source, &id, &flags);
if (dres != ISC_R_SUCCESS) {
dispatch_log(disp, LVL(10), "got garbage packet");
goto next;
}
dispatch_log(disp, LVL(92),
"got valid DNS message header, /QR %c, id %u",
(((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id);
/*
* Look at the message flags. If it's a query, ignore it
* and keep reading.
*/
if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
/*
* Query.
*/
goto next;
}
/*
* We have a valid response; find the associated dispentry object
* and call the caller back.
*/
bucket = dns_hash(qid, &peer, id, disp->localport);
LOCK(&qid->lock);
resp = entry_search(qid, &peer, id, disp->localport, bucket);
if (resp != NULL) {
dispentry_attach(resp, &(dns_dispentry_t *){ NULL });
}
dispatch_log(disp, LVL(90), "search for response in bucket %d: %s",
bucket, (resp == NULL ? "not found" : "found"));
UNLOCK(&qid->lock);
next:
dispatch_getnext(disp, NULL, -1);
done:
UNLOCK(&disp->lock);
if (resp != NULL) {
/* We got a matching response, or timed out */
resp->response(eresult, region, resp->arg);
dispentry_detach(&resp);
} else {
/* We're being shut down; cancel all outstanding resps */
for (resp = ISC_LIST_HEAD(resps); resp != NULL; resp = next) {
next = ISC_LIST_NEXT(resp, rlink);
ISC_LIST_UNLINK(resps, resp, rlink);
resp->response(ISC_R_SHUTTINGDOWN, region, resp->arg);
dispentry_detach(&resp);
}
switch (result) {
case ISC_R_SUCCESS:
case ISC_R_TIMEDOUT:
/*
* Either we found a matching response, or we timed out
* and canceled the oldest resp.
*/
INSIST(resp != NULL);
tcp_recv_done(resp, result, region);
break;
case ISC_R_NOTFOUND:
/*
* Either we got a response that didn't match any active
* resps, or we timed out but there *were* no active resps.
*/
break;
default:
/* We're being shut down; cancel all outstanding resps. */
tcp_recv_cancelall(&resps, region);
}
dns_dispatch_detach(&disp);
@ -1442,6 +1521,8 @@ dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, int32_t timeout) {
switch (disp->socktype) {
case isc_socktype_udp:
REQUIRE(resp != NULL);
dispentry_attach(resp, &(dns_dispentry_t *){ NULL });
if (timeout > 0) {
isc_nmhandle_settimeout(resp->handle, timeout);

View File

@ -228,6 +228,11 @@
./bin/tests/system/digdelv/setup.sh SH 2018,2019,2020,2021
./bin/tests/system/digdelv/tests.sh SH 2015,2016,2017,2018,2019,2020,2021
./bin/tests/system/digdelv/yamlget.py PYTHON 2019,2020,2021
./bin/tests/system/dispatch/ans3/ans.py PYTHON 2021
./bin/tests/system/dispatch/clean.sh SH 2021
./bin/tests/system/dispatch/conftest.py PYTHON 2021
./bin/tests/system/dispatch/setup.sh SH 2021
./bin/tests/system/dispatch/tests-connreset.py PYTHON-BIN 2021
./bin/tests/system/ditch.pl PERL 2015,2016,2018,2019,2020,2021
./bin/tests/system/dlzexternal/clean.sh SH 2010,2012,2014,2015,2016,2018,2019,2020,2021
./bin/tests/system/dlzexternal/driver/driver.c C 2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021