diff --git a/CHANGES b/CHANGES index b6167e471c..d76f0d8c69 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +5746. [bug] A lame server delegation could lead to a loop in which + a resolver fetch depends on an ADB find which depends + on the same resolver fetch. Previously, this would + cause the fetch to hang until timing out, but after + change #5730 it would hang forever. The condition is + now detected and avoided. [GL #2927] + 5745. [bug] Fetch context objects now use attach/detach semantics to make it easier to find and debug reference-counting errors, and several such errors diff --git a/bin/tests/system/resolver/ans2/ans.pl b/bin/tests/system/resolver/ans2/ans.pl index cc923e3dfb..d9f6f53508 100644 --- a/bin/tests/system/resolver/ans2/ans.pl +++ b/bin/tests/system/resolver/ans2/ans.pl @@ -102,6 +102,11 @@ for (;;) { } elsif ($qname =~ /example\.net/) { $packet->push("authority", new Net::DNS::RR("example.net 300 NS ns.example.net")); $packet->push("additional", new Net::DNS::RR("ns.example.net 300 A 10.53.0.3")); + } elsif ($qname =~ /lame\.example\.org/) { + $packet->header->ad(0); + $packet->header->aa(0); + $packet->push("authority", new Net::DNS::RR("lame.example.org 300 NS ns.lame.example.org")); + $packet->push("additional", new Net::DNS::RR("ns.lame.example.org 300 A 10.53.0.3")); } elsif ($qname =~ /sub\.example\.org/) { # Data for CNAME/DNAME filtering. The final answers are # expected to be accepted regardless of the filter setting. diff --git a/bin/tests/system/resolver/ans3/ans.pl b/bin/tests/system/resolver/ans3/ans.pl index be8708ddf5..15d4c98780 100644 --- a/bin/tests/system/resolver/ans3/ans.pl +++ b/bin/tests/system/resolver/ans3/ans.pl @@ -98,6 +98,11 @@ for (;;) { } elsif ($qname =~ /^nxdomain\.example\.net$/i) { $packet->header->aa(1); $packet->header->rcode(NXDOMAIN); + } elsif ($qname =~ /lame\.example\.org/) { + $packet->header->ad(0); + $packet->header->aa(0); + $packet->push("authority", new Net::DNS::RR("lame.example.org 300 NS ns.lame.example.org")); + $packet->push("additional", new Net::DNS::RR("ns.lame.example.org 300 A 10.53.0.3")); } elsif ($qname eq "cname.sub.example.org") { $packet->push("answer", new Net::DNS::RR($qname . diff --git a/bin/tests/system/resolver/tests.sh b/bin/tests/system/resolver/tests.sh index 165b196b70..d8485f29ed 100755 --- a/bin/tests/system/resolver/tests.sh +++ b/bin/tests/system/resolver/tests.sh @@ -889,5 +889,13 @@ grep 'cname-next\.example\.net\..*CNAME.http-server\.example\.net\.' dig.out.ns7 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` +n=`expr $n + 1` +echo_i "check ADB find loops are detected ($n)" +ret=0 +$DIG $DIGOPTS +tcp +tries=1 +timeout=5 @10.53.0.1 fake.lame.example.org > dig.out.ns1.${n} || ret=1 +grep "status: SERVFAIL" dig.out.ns1.${n} > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=`expr $status + $ret` + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 98874357fd..28127fb2e7 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -112,3 +112,8 @@ Bug Fixes - Logfiles using ``timestamp``-style suffixes were not always correctly removed when the number of files exceeded the limit set by ``versions``. :gl:`#828` + +- Some lame delegations could trigger a dependency loop, in which a + resolver fetch was waiting for a name server address lookup which was + waiting for the same resolver fetch. This could cause a recursive lookup + to hang until timing out. This now detected and avoided. :gl:`#2927` diff --git a/lib/dns/adb.c b/lib/dns/adb.c index a5fbee0a0b..27bce060b8 100644 --- a/lib/dns/adb.c +++ b/lib/dns/adb.c @@ -407,14 +407,10 @@ log_quota(dns_adbentry_t *entry, const char *fmt, ...) ISC_FORMAT_PRINTF(2, 3); * Fetches are broken out into A and AAAA types. In some cases, * however, it makes more sense to test for a particular class of fetches, * like V4 or V6 above. - * Note: since we have removed the support of A6 in adb, FETCH_A and FETCH_AAAA - * are now equal to FETCH_V4 and FETCH_V6, respectively. */ #define NAME_FETCH_A(n) ((n)->fetch_a != NULL) #define NAME_FETCH_AAAA(n) ((n)->fetch_aaaa != NULL) -#define NAME_FETCH_V4(n) (NAME_FETCH_A(n)) -#define NAME_FETCH_V6(n) (NAME_FETCH_AAAA(n)) -#define NAME_FETCH(n) (NAME_FETCH_V4(n) || NAME_FETCH_V6(n)) +#define NAME_FETCH(n) (NAME_FETCH_A(n) || NAME_FETCH_AAAA(n)) /* * Find options and tests to see if there are addresses on the list. @@ -898,18 +894,18 @@ static isc_result_t import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, isc_stdtime_t now) { isc_result_t result; - dns_adb_t *adb; - dns_adbnamehook_t *nh; - dns_adbnamehook_t *anh; + dns_adb_t *adb = NULL; + dns_adbnamehook_t *nh = NULL; + dns_adbnamehook_t *anh = NULL; dns_rdata_t rdata = DNS_RDATA_INIT; struct in_addr ina; struct in6_addr in6a; isc_sockaddr_t sockaddr; - dns_adbentry_t *foundentry; /* NO CLEAN UP! */ + dns_adbentry_t *foundentry = NULL; /* NO CLEAN UP! */ int addr_bucket; bool new_addresses_added; dns_rdatatype_t rdtype; - dns_adbnamehooklist_t *hookhead; + dns_adbnamehooklist_t *hookhead = NULL; INSIST(DNS_ADBNAME_VALID(adbname)); adb = adbname->adb; @@ -921,7 +917,6 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, addr_bucket = DNS_ADB_INVALIDBUCKET; new_addresses_added = false; - nh = NULL; result = dns_rdataset_first(rdataset); while (result == ISC_R_SUCCESS) { dns_rdata_reset(&rdata); @@ -978,10 +973,6 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, result = dns_rdataset_next(rdataset); } - if (nh != NULL) { - free_adbnamehook(adb, &nh); - } - if (addr_bucket != DNS_ADB_INVALIDBUCKET) { UNLOCK(&adb->entrylocks[addr_bucket]); } @@ -1103,7 +1094,7 @@ check_expire_namehooks(dns_adbname_t *name, isc_stdtime_t now) { /* * Check to see if we need to remove the v4 addresses */ - if (!NAME_FETCH_V4(name) && EXPIRE_OK(name->expire_v4, now)) { + if (!NAME_FETCH_A(name) && EXPIRE_OK(name->expire_v4, now)) { if (NAME_HAS_V4(name)) { DP(DEF_LEVEL, "expiring v4 for name %p", name); result4 = clean_namehooks(adb, &name->v4); @@ -1116,7 +1107,7 @@ check_expire_namehooks(dns_adbname_t *name, isc_stdtime_t now) { /* * Check to see if we need to remove the v6 addresses */ - if (!NAME_FETCH_V6(name) && EXPIRE_OK(name->expire_v6, now)) { + if (!NAME_FETCH_AAAA(name) && EXPIRE_OK(name->expire_v6, now)) { if (NAME_HAS_V6(name)) { DP(DEF_LEVEL, "expiring v6 for name %p", name); result6 = clean_namehooks(adb, &name->v6); @@ -2867,14 +2858,17 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action, isc_stdtime_t now, dns_name_t *target, in_port_t port, unsigned int depth, isc_counter_t *qc, dns_adbfind_t **findp) { - dns_adbfind_t *find; - dns_adbname_t *adbname; + dns_adbfind_t *find = NULL; + dns_adbname_t *adbname = NULL; int bucket; - bool want_event, start_at_zone, alias, have_address; + bool want_event = true; + bool start_at_zone = false; + bool alias = false; + bool have_address = false; isc_result_t result; - unsigned int wanted_addresses; - unsigned int wanted_fetches; - unsigned int query_pending; + unsigned int wanted_addresses = (options & DNS_ADBFIND_ADDRESSMASK); + unsigned int wanted_fetches = 0; + unsigned int query_pending = 0; char namebuf[DNS_NAME_FORMATSIZE]; REQUIRE(DNS_ADB_VALID(adb)); @@ -2890,12 +2884,6 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action, result = ISC_R_UNEXPECTED; POST(result); - wanted_addresses = (options & DNS_ADBFIND_ADDRESSMASK); - wanted_fetches = 0; - query_pending = 0; - want_event = false; - start_at_zone = false; - alias = false; if (now == 0) { isc_stdtime_get(&now); @@ -3043,7 +3031,7 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action, goto v6; } - if (!NAME_FETCH_V4(adbname)) { + if (!NAME_FETCH_A(adbname)) { wanted_fetches |= DNS_ADBFIND_INET; } } @@ -3079,7 +3067,7 @@ v6: goto fetch; } - if (!NAME_FETCH_V6(adbname)) { + if (!NAME_FETCH_AAAA(adbname)) { wanted_fetches |= DNS_ADBFIND_INET6; } } @@ -3140,10 +3128,10 @@ fetch: copy_namehook_lists(adb, find, qname, qtype, adbname, now); post_copy: - if (NAME_FETCH_V4(adbname)) { + if (NAME_FETCH_A(adbname)) { query_pending |= DNS_ADBFIND_INET; } - if (NAME_FETCH_V6(adbname)) { + if (NAME_FETCH_AAAA(adbname)) { query_pending |= DNS_ADBFIND_INET6; } @@ -3151,7 +3139,6 @@ post_copy: * Attach to the name's query list if there are queries * already running, and we have been asked to. */ - want_event = true; if (!FIND_WANTEVENT(find)) { want_event = false; } @@ -3165,16 +3152,15 @@ post_copy: want_event = false; } if (want_event) { + bool empty; find->adbname = adbname; find->name_bucket = bucket; - bool empty = ISC_LIST_EMPTY(adbname->finds); + empty = ISC_LIST_EMPTY(adbname->finds); ISC_LIST_APPEND(adbname->finds, find, plink); find->query_pending = (query_pending & wanted_addresses); find->flags &= ~DNS_ADBFIND_ADDRESSMASK; find->flags |= (find->query_pending & DNS_ADBFIND_ADDRESSMASK); - DP(DEF_LEVEL, - "createfind: attaching find %p to adbname " - "%p %d", + DP(DEF_LEVEL, "createfind: attaching find %p to adbname %p %d", find, adbname, empty); } else { /* @@ -3207,22 +3193,20 @@ post_copy: out: if (find != NULL) { - *findp = find; - if (want_event) { - isc_task_t *taskp; + isc_task_t *taskp = NULL; INSIST((find->flags & DNS_ADBFIND_ADDRESSMASK) != 0); - taskp = NULL; isc_task_attach(task, &taskp); find->event.ev_sender = taskp; find->event.ev_action = action; find->event.ev_arg = arg; } + + *findp = find; } UNLOCK(&adb->namelocks[bucket]); - return (result); } @@ -4011,8 +3995,8 @@ fetch_name(dns_adbname_t *adbname, bool start_at_zone, unsigned int depth, adb = adbname->adb; INSIST(DNS_ADB_VALID(adb)); - INSIST((type == dns_rdatatype_a && !NAME_FETCH_V4(adbname)) || - (type == dns_rdatatype_aaaa && !NAME_FETCH_V6(adbname))); + INSIST((type == dns_rdatatype_a && !NAME_FETCH_A(adbname)) || + (type == dns_rdatatype_aaaa && !NAME_FETCH_AAAA(adbname))); adbname->fetch_err = FIND_ERR_NOTFOUND; diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 68839ae57f..eb752e224b 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -408,8 +408,8 @@ struct fetchctx { /*% * Fetch-local statistics for detailed logging. */ - isc_result_t result; /*%< fetch result */ - isc_result_t vresult; /*%< validation result */ + isc_result_t result; /*%< fetch result */ + isc_result_t vresult; /*%< validation result */ int exitline; isc_time_t start; uint64_t duration; @@ -1786,7 +1786,7 @@ resquery_senddone(isc_result_t eresult, isc_region_t *region, void *arg) { case ISC_R_NOPERM: case ISC_R_ADDRNOTAVAIL: case ISC_R_CONNREFUSED: - /* No route to remote. */ + /* No route to remote. */ FCTXTRACE3("query canceled in resquery_senddone(): " "no route to host; no response", eresult); @@ -3209,20 +3209,51 @@ sort_finds(dns_adbfindlist_t *findlist, unsigned int bias) { *findlist = sorted; } +/* + * Return true iff the ADB find has a pending fetch for 'type'. This is + * used to find out whether we're in a loop, where a fetch is waiting for a + * find which is waiting for that same fetch. + * + * Note: This could be done with either an equivalence check (e.g., + * query_pending == DNS_ADBFIND_INET) or with a bit check, as below. If + * we checked for equivalence, that would mean we could only detect a loop + * when there is exactly one pending fetch, and we're it. If there were + * pending fetches for *both* address families, then a loop would be + * undetected. + * + * However, using a bit check means that in theory, an ADB find might be + * aborted that could have succeeded, if the other fetch had returned an + * answer. + * + * Since there's a good chance the server is broken and won't answer either + * query, and since an ADB find with two pending fetches is a very rare + * occurrance anyway, we regard this theoretical SERVFAIL as the lesser + * evil. + */ +static inline bool +waiting_for(dns_adbfind_t *find, dns_rdatatype_t type) { + switch (type) { + case dns_rdatatype_a: + return ((find->query_pending & DNS_ADBFIND_INET) != 0); + case dns_rdatatype_aaaa: + return ((find->query_pending & DNS_ADBFIND_INET6) != 0); + default: + return (false); + } +} + static void findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, unsigned int options, unsigned int flags, isc_stdtime_t now, bool *overquota, bool *need_alternate, unsigned int *no_addresses) { - dns_adbaddrinfo_t *ai; - dns_adbfind_t *find; - dns_resolver_t *res; - bool unshared; + dns_adbaddrinfo_t *ai = NULL; + dns_adbfind_t *find = NULL; + dns_resolver_t *res = fctx->res; + bool unshared = ((fctx->options & DNS_FETCHOPT_UNSHARED) != 0); isc_result_t result; - fetchctx_t *ev_fctx = NULL; FCTXTRACE("FINDNAME"); - res = fctx->res; - unshared = ((fctx->options & DNS_FETCHOPT_UNSHARED) != 0); + /* * If this name is a subdomain of the query domain, tell * the ADB to start looking using zone/hint data. This keeps us @@ -3239,11 +3270,10 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, /* * See what we know about this address. */ - find = NULL; - fctx_attach(fctx, &ev_fctx); + fctx_attach(fctx, &(fetchctx_t *){ NULL }); result = dns_adb_createfind( fctx->adb, res->buckets[fctx->bucketnum].task, fctx_finddone, - ev_fctx, name, fctx->name, fctx->type, options, now, NULL, + fctx, name, fctx->name, fctx->type, options, now, NULL, res->view->dstport, fctx->depth + 1, fctx->qc, &find); isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER, @@ -3267,7 +3297,7 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, "is a CNAME, while resolving '%s'", namebuf, fctx->info); } - fctx_detach(&ev_fctx); + fctx_detach(&fctx); return; } @@ -3298,12 +3328,33 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, /* * We don't know any of the addresses for this name. + * + * The find may be waiting on a resolver fetch for a server + * address. We need to make sure it isn't waiting on *this* + * fetch, because if it is, we won't be answering it and it + * won't be answering us. + */ + if (waiting_for(find, fctx->type) && dns_name_equal(name, fctx->name)) { + fctx->adberr++; + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER, + DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO, + "loop detected resolving '%s'", fctx->info); + + if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) { + atomic_fetch_add_relaxed(&fctx->pending, 1); + dns_adb_cancelfind(find); + } else { + dns_adb_destroyfind(&find); + fctx_detach(&fctx); + } + return; + } + + /* + * We may be waiting for another fetch to complete, and + * we'll get an event later when the find has what it needs. */ if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) { - /* - * We're looking for them and will get an - * event about it later. - */ atomic_fetch_add_relaxed(&fctx->pending, 1); /* @@ -3323,17 +3374,18 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, return; } + /* + * No addresses and no pending events: the find failed. + */ if ((find->options & DNS_ADBFIND_OVERQUOTA) != 0) { if (overquota != NULL) { *overquota = true; } fctx->quotacount++; /* quota exceeded */ } else if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0) { - fctx->lamecount++; /* cached lame server - */ + fctx->lamecount++; /* cached lame server */ } else { - fctx->adberr++; /* unreachable server, - etc. */ + fctx->adberr++; /* unreachable server, etc. */ } /* @@ -3347,7 +3399,7 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, *need_alternate = true; } dns_adb_destroyfind(&find); - fctx_detach(&ev_fctx); + fctx_detach(&fctx); } static bool