2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 14:07:59 +00:00

Merge branch '2927-lame-server-loop' into 'main'

check for loops in ADB finds

Closes #2927

See merge request isc-projects/bind9!5474
This commit is contained in:
Evan Hunt
2021-10-21 09:26:44 +00:00
7 changed files with 134 additions and 68 deletions

View File

@@ -1,3 +1,10 @@
5746. [bug] A lame server delegation could lead to a loop in which
a resolver fetch depends on an ADB find which depends
on the same resolver fetch. Previously, this would
cause the fetch to hang until timing out, but after
change #5730 it would hang forever. The condition is
now detected and avoided. [GL #2927]
5745. [bug] Fetch context objects now use attach/detach
semantics to make it easier to find and debug
reference-counting errors, and several such errors

View File

@@ -102,6 +102,11 @@ for (;;) {
} elsif ($qname =~ /example\.net/) {
$packet->push("authority", new Net::DNS::RR("example.net 300 NS ns.example.net"));
$packet->push("additional", new Net::DNS::RR("ns.example.net 300 A 10.53.0.3"));
} elsif ($qname =~ /lame\.example\.org/) {
$packet->header->ad(0);
$packet->header->aa(0);
$packet->push("authority", new Net::DNS::RR("lame.example.org 300 NS ns.lame.example.org"));
$packet->push("additional", new Net::DNS::RR("ns.lame.example.org 300 A 10.53.0.3"));
} elsif ($qname =~ /sub\.example\.org/) {
# Data for CNAME/DNAME filtering. The final answers are
# expected to be accepted regardless of the filter setting.

View File

@@ -98,6 +98,11 @@ for (;;) {
} elsif ($qname =~ /^nxdomain\.example\.net$/i) {
$packet->header->aa(1);
$packet->header->rcode(NXDOMAIN);
} elsif ($qname =~ /lame\.example\.org/) {
$packet->header->ad(0);
$packet->header->aa(0);
$packet->push("authority", new Net::DNS::RR("lame.example.org 300 NS ns.lame.example.org"));
$packet->push("additional", new Net::DNS::RR("ns.lame.example.org 300 A 10.53.0.3"));
} elsif ($qname eq "cname.sub.example.org") {
$packet->push("answer",
new Net::DNS::RR($qname .

View File

@@ -889,5 +889,13 @@ grep 'cname-next\.example\.net\..*CNAME.http-server\.example\.net\.' dig.out.ns7
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "check ADB find loops are detected ($n)"
ret=0
$DIG $DIGOPTS +tcp +tries=1 +timeout=5 @10.53.0.1 fake.lame.example.org > dig.out.ns1.${n} || ret=1
grep "status: SERVFAIL" dig.out.ns1.${n} > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
echo_i "exit status: $status"
[ $status -eq 0 ] || exit 1

View File

@@ -112,3 +112,8 @@ Bug Fixes
- Logfiles using ``timestamp``-style suffixes were not always correctly
removed when the number of files exceeded the limit set by ``versions``.
:gl:`#828`
- Some lame delegations could trigger a dependency loop, in which a
resolver fetch was waiting for a name server address lookup which was
waiting for the same resolver fetch. This could cause a recursive lookup
to hang until timing out. This now detected and avoided. :gl:`#2927`

View File

@@ -407,14 +407,10 @@ log_quota(dns_adbentry_t *entry, const char *fmt, ...) ISC_FORMAT_PRINTF(2, 3);
* Fetches are broken out into A and AAAA types. In some cases,
* however, it makes more sense to test for a particular class of fetches,
* like V4 or V6 above.
* Note: since we have removed the support of A6 in adb, FETCH_A and FETCH_AAAA
* are now equal to FETCH_V4 and FETCH_V6, respectively.
*/
#define NAME_FETCH_A(n) ((n)->fetch_a != NULL)
#define NAME_FETCH_AAAA(n) ((n)->fetch_aaaa != NULL)
#define NAME_FETCH_V4(n) (NAME_FETCH_A(n))
#define NAME_FETCH_V6(n) (NAME_FETCH_AAAA(n))
#define NAME_FETCH(n) (NAME_FETCH_V4(n) || NAME_FETCH_V6(n))
#define NAME_FETCH(n) (NAME_FETCH_A(n) || NAME_FETCH_AAAA(n))
/*
* Find options and tests to see if there are addresses on the list.
@@ -898,18 +894,18 @@ static isc_result_t
import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset,
isc_stdtime_t now) {
isc_result_t result;
dns_adb_t *adb;
dns_adbnamehook_t *nh;
dns_adbnamehook_t *anh;
dns_adb_t *adb = NULL;
dns_adbnamehook_t *nh = NULL;
dns_adbnamehook_t *anh = NULL;
dns_rdata_t rdata = DNS_RDATA_INIT;
struct in_addr ina;
struct in6_addr in6a;
isc_sockaddr_t sockaddr;
dns_adbentry_t *foundentry; /* NO CLEAN UP! */
dns_adbentry_t *foundentry = NULL; /* NO CLEAN UP! */
int addr_bucket;
bool new_addresses_added;
dns_rdatatype_t rdtype;
dns_adbnamehooklist_t *hookhead;
dns_adbnamehooklist_t *hookhead = NULL;
INSIST(DNS_ADBNAME_VALID(adbname));
adb = adbname->adb;
@@ -921,7 +917,6 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset,
addr_bucket = DNS_ADB_INVALIDBUCKET;
new_addresses_added = false;
nh = NULL;
result = dns_rdataset_first(rdataset);
while (result == ISC_R_SUCCESS) {
dns_rdata_reset(&rdata);
@@ -978,10 +973,6 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset,
result = dns_rdataset_next(rdataset);
}
if (nh != NULL) {
free_adbnamehook(adb, &nh);
}
if (addr_bucket != DNS_ADB_INVALIDBUCKET) {
UNLOCK(&adb->entrylocks[addr_bucket]);
}
@@ -1103,7 +1094,7 @@ check_expire_namehooks(dns_adbname_t *name, isc_stdtime_t now) {
/*
* Check to see if we need to remove the v4 addresses
*/
if (!NAME_FETCH_V4(name) && EXPIRE_OK(name->expire_v4, now)) {
if (!NAME_FETCH_A(name) && EXPIRE_OK(name->expire_v4, now)) {
if (NAME_HAS_V4(name)) {
DP(DEF_LEVEL, "expiring v4 for name %p", name);
result4 = clean_namehooks(adb, &name->v4);
@@ -1116,7 +1107,7 @@ check_expire_namehooks(dns_adbname_t *name, isc_stdtime_t now) {
/*
* Check to see if we need to remove the v6 addresses
*/
if (!NAME_FETCH_V6(name) && EXPIRE_OK(name->expire_v6, now)) {
if (!NAME_FETCH_AAAA(name) && EXPIRE_OK(name->expire_v6, now)) {
if (NAME_HAS_V6(name)) {
DP(DEF_LEVEL, "expiring v6 for name %p", name);
result6 = clean_namehooks(adb, &name->v6);
@@ -2867,14 +2858,17 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action,
isc_stdtime_t now, dns_name_t *target, in_port_t port,
unsigned int depth, isc_counter_t *qc,
dns_adbfind_t **findp) {
dns_adbfind_t *find;
dns_adbname_t *adbname;
dns_adbfind_t *find = NULL;
dns_adbname_t *adbname = NULL;
int bucket;
bool want_event, start_at_zone, alias, have_address;
bool want_event = true;
bool start_at_zone = false;
bool alias = false;
bool have_address = false;
isc_result_t result;
unsigned int wanted_addresses;
unsigned int wanted_fetches;
unsigned int query_pending;
unsigned int wanted_addresses = (options & DNS_ADBFIND_ADDRESSMASK);
unsigned int wanted_fetches = 0;
unsigned int query_pending = 0;
char namebuf[DNS_NAME_FORMATSIZE];
REQUIRE(DNS_ADB_VALID(adb));
@@ -2890,12 +2884,6 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action,
result = ISC_R_UNEXPECTED;
POST(result);
wanted_addresses = (options & DNS_ADBFIND_ADDRESSMASK);
wanted_fetches = 0;
query_pending = 0;
want_event = false;
start_at_zone = false;
alias = false;
if (now == 0) {
isc_stdtime_get(&now);
@@ -3043,7 +3031,7 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action,
goto v6;
}
if (!NAME_FETCH_V4(adbname)) {
if (!NAME_FETCH_A(adbname)) {
wanted_fetches |= DNS_ADBFIND_INET;
}
}
@@ -3079,7 +3067,7 @@ v6:
goto fetch;
}
if (!NAME_FETCH_V6(adbname)) {
if (!NAME_FETCH_AAAA(adbname)) {
wanted_fetches |= DNS_ADBFIND_INET6;
}
}
@@ -3140,10 +3128,10 @@ fetch:
copy_namehook_lists(adb, find, qname, qtype, adbname, now);
post_copy:
if (NAME_FETCH_V4(adbname)) {
if (NAME_FETCH_A(adbname)) {
query_pending |= DNS_ADBFIND_INET;
}
if (NAME_FETCH_V6(adbname)) {
if (NAME_FETCH_AAAA(adbname)) {
query_pending |= DNS_ADBFIND_INET6;
}
@@ -3151,7 +3139,6 @@ post_copy:
* Attach to the name's query list if there are queries
* already running, and we have been asked to.
*/
want_event = true;
if (!FIND_WANTEVENT(find)) {
want_event = false;
}
@@ -3165,16 +3152,15 @@ post_copy:
want_event = false;
}
if (want_event) {
bool empty;
find->adbname = adbname;
find->name_bucket = bucket;
bool empty = ISC_LIST_EMPTY(adbname->finds);
empty = ISC_LIST_EMPTY(adbname->finds);
ISC_LIST_APPEND(adbname->finds, find, plink);
find->query_pending = (query_pending & wanted_addresses);
find->flags &= ~DNS_ADBFIND_ADDRESSMASK;
find->flags |= (find->query_pending & DNS_ADBFIND_ADDRESSMASK);
DP(DEF_LEVEL,
"createfind: attaching find %p to adbname "
"%p %d",
DP(DEF_LEVEL, "createfind: attaching find %p to adbname %p %d",
find, adbname, empty);
} else {
/*
@@ -3207,22 +3193,20 @@ post_copy:
out:
if (find != NULL) {
*findp = find;
if (want_event) {
isc_task_t *taskp;
isc_task_t *taskp = NULL;
INSIST((find->flags & DNS_ADBFIND_ADDRESSMASK) != 0);
taskp = NULL;
isc_task_attach(task, &taskp);
find->event.ev_sender = taskp;
find->event.ev_action = action;
find->event.ev_arg = arg;
}
*findp = find;
}
UNLOCK(&adb->namelocks[bucket]);
return (result);
}
@@ -4011,8 +3995,8 @@ fetch_name(dns_adbname_t *adbname, bool start_at_zone, unsigned int depth,
adb = adbname->adb;
INSIST(DNS_ADB_VALID(adb));
INSIST((type == dns_rdatatype_a && !NAME_FETCH_V4(adbname)) ||
(type == dns_rdatatype_aaaa && !NAME_FETCH_V6(adbname)));
INSIST((type == dns_rdatatype_a && !NAME_FETCH_A(adbname)) ||
(type == dns_rdatatype_aaaa && !NAME_FETCH_AAAA(adbname)));
adbname->fetch_err = FIND_ERR_NOTFOUND;

View File

@@ -3209,20 +3209,51 @@ sort_finds(dns_adbfindlist_t *findlist, unsigned int bias) {
*findlist = sorted;
}
/*
* Return true iff the ADB find has a pending fetch for 'type'. This is
* used to find out whether we're in a loop, where a fetch is waiting for a
* find which is waiting for that same fetch.
*
* Note: This could be done with either an equivalence check (e.g.,
* query_pending == DNS_ADBFIND_INET) or with a bit check, as below. If
* we checked for equivalence, that would mean we could only detect a loop
* when there is exactly one pending fetch, and we're it. If there were
* pending fetches for *both* address families, then a loop would be
* undetected.
*
* However, using a bit check means that in theory, an ADB find might be
* aborted that could have succeeded, if the other fetch had returned an
* answer.
*
* Since there's a good chance the server is broken and won't answer either
* query, and since an ADB find with two pending fetches is a very rare
* occurrance anyway, we regard this theoretical SERVFAIL as the lesser
* evil.
*/
static inline bool
waiting_for(dns_adbfind_t *find, dns_rdatatype_t type) {
switch (type) {
case dns_rdatatype_a:
return ((find->query_pending & DNS_ADBFIND_INET) != 0);
case dns_rdatatype_aaaa:
return ((find->query_pending & DNS_ADBFIND_INET6) != 0);
default:
return (false);
}
}
static void
findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
unsigned int options, unsigned int flags, isc_stdtime_t now,
bool *overquota, bool *need_alternate, unsigned int *no_addresses) {
dns_adbaddrinfo_t *ai;
dns_adbfind_t *find;
dns_resolver_t *res;
bool unshared;
dns_adbaddrinfo_t *ai = NULL;
dns_adbfind_t *find = NULL;
dns_resolver_t *res = fctx->res;
bool unshared = ((fctx->options & DNS_FETCHOPT_UNSHARED) != 0);
isc_result_t result;
fetchctx_t *ev_fctx = NULL;
FCTXTRACE("FINDNAME");
res = fctx->res;
unshared = ((fctx->options & DNS_FETCHOPT_UNSHARED) != 0);
/*
* If this name is a subdomain of the query domain, tell
* the ADB to start looking using zone/hint data. This keeps us
@@ -3239,11 +3270,10 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
/*
* See what we know about this address.
*/
find = NULL;
fctx_attach(fctx, &ev_fctx);
fctx_attach(fctx, &(fetchctx_t *){ NULL });
result = dns_adb_createfind(
fctx->adb, res->buckets[fctx->bucketnum].task, fctx_finddone,
ev_fctx, name, fctx->name, fctx->type, options, now, NULL,
fctx, name, fctx->name, fctx->type, options, now, NULL,
res->view->dstport, fctx->depth + 1, fctx->qc, &find);
isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
@@ -3267,7 +3297,7 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
"is a CNAME, while resolving '%s'",
namebuf, fctx->info);
}
fctx_detach(&ev_fctx);
fctx_detach(&fctx);
return;
}
@@ -3298,12 +3328,33 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
/*
* We don't know any of the addresses for this name.
*
* The find may be waiting on a resolver fetch for a server
* address. We need to make sure it isn't waiting on *this*
* fetch, because if it is, we won't be answering it and it
* won't be answering us.
*/
if (waiting_for(find, fctx->type) && dns_name_equal(name, fctx->name)) {
fctx->adberr++;
isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
"loop detected resolving '%s'", fctx->info);
if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
atomic_fetch_add_relaxed(&fctx->pending, 1);
dns_adb_cancelfind(find);
} else {
dns_adb_destroyfind(&find);
fctx_detach(&fctx);
}
return;
}
/*
* We may be waiting for another fetch to complete, and
* we'll get an event later when the find has what it needs.
*/
if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
/*
* We're looking for them and will get an
* event about it later.
*/
atomic_fetch_add_relaxed(&fctx->pending, 1);
/*
@@ -3323,17 +3374,18 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
return;
}
/*
* No addresses and no pending events: the find failed.
*/
if ((find->options & DNS_ADBFIND_OVERQUOTA) != 0) {
if (overquota != NULL) {
*overquota = true;
}
fctx->quotacount++; /* quota exceeded */
} else if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0) {
fctx->lamecount++; /* cached lame server
*/
fctx->lamecount++; /* cached lame server */
} else {
fctx->adberr++; /* unreachable server,
etc. */
fctx->adberr++; /* unreachable server, etc. */
}
/*
@@ -3347,7 +3399,7 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
*need_alternate = true;
}
dns_adb_destroyfind(&find);
fctx_detach(&ev_fctx);
fctx_detach(&fctx);
}
static bool