From 93f5bc893eca1db47f42fce8b3aae3101dd5a772 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Mon, 18 Oct 2021 17:57:58 -0700 Subject: [PATCH 1/4] incidental cleanup The NAME_FETCH_A and NAME_FETCH_AAAA macros were meant to be boolean, indicating whether the pointers were set or not, while the NAME_FETCH_V4 and NAME_FETCH_V6 macros were meant to return the pointer values. The latter were only used as booleans, so they've been removed in favor of the former. Also did some style cleanup and removed an unreachable code block. --- lib/dns/adb.c | 74 ++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 45 deletions(-) diff --git a/lib/dns/adb.c b/lib/dns/adb.c index a5fbee0a0b..27bce060b8 100644 --- a/lib/dns/adb.c +++ b/lib/dns/adb.c @@ -407,14 +407,10 @@ log_quota(dns_adbentry_t *entry, const char *fmt, ...) ISC_FORMAT_PRINTF(2, 3); * Fetches are broken out into A and AAAA types. In some cases, * however, it makes more sense to test for a particular class of fetches, * like V4 or V6 above. - * Note: since we have removed the support of A6 in adb, FETCH_A and FETCH_AAAA - * are now equal to FETCH_V4 and FETCH_V6, respectively. */ #define NAME_FETCH_A(n) ((n)->fetch_a != NULL) #define NAME_FETCH_AAAA(n) ((n)->fetch_aaaa != NULL) -#define NAME_FETCH_V4(n) (NAME_FETCH_A(n)) -#define NAME_FETCH_V6(n) (NAME_FETCH_AAAA(n)) -#define NAME_FETCH(n) (NAME_FETCH_V4(n) || NAME_FETCH_V6(n)) +#define NAME_FETCH(n) (NAME_FETCH_A(n) || NAME_FETCH_AAAA(n)) /* * Find options and tests to see if there are addresses on the list. @@ -898,18 +894,18 @@ static isc_result_t import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, isc_stdtime_t now) { isc_result_t result; - dns_adb_t *adb; - dns_adbnamehook_t *nh; - dns_adbnamehook_t *anh; + dns_adb_t *adb = NULL; + dns_adbnamehook_t *nh = NULL; + dns_adbnamehook_t *anh = NULL; dns_rdata_t rdata = DNS_RDATA_INIT; struct in_addr ina; struct in6_addr in6a; isc_sockaddr_t sockaddr; - dns_adbentry_t *foundentry; /* NO CLEAN UP! */ + dns_adbentry_t *foundentry = NULL; /* NO CLEAN UP! */ int addr_bucket; bool new_addresses_added; dns_rdatatype_t rdtype; - dns_adbnamehooklist_t *hookhead; + dns_adbnamehooklist_t *hookhead = NULL; INSIST(DNS_ADBNAME_VALID(adbname)); adb = adbname->adb; @@ -921,7 +917,6 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, addr_bucket = DNS_ADB_INVALIDBUCKET; new_addresses_added = false; - nh = NULL; result = dns_rdataset_first(rdataset); while (result == ISC_R_SUCCESS) { dns_rdata_reset(&rdata); @@ -978,10 +973,6 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, result = dns_rdataset_next(rdataset); } - if (nh != NULL) { - free_adbnamehook(adb, &nh); - } - if (addr_bucket != DNS_ADB_INVALIDBUCKET) { UNLOCK(&adb->entrylocks[addr_bucket]); } @@ -1103,7 +1094,7 @@ check_expire_namehooks(dns_adbname_t *name, isc_stdtime_t now) { /* * Check to see if we need to remove the v4 addresses */ - if (!NAME_FETCH_V4(name) && EXPIRE_OK(name->expire_v4, now)) { + if (!NAME_FETCH_A(name) && EXPIRE_OK(name->expire_v4, now)) { if (NAME_HAS_V4(name)) { DP(DEF_LEVEL, "expiring v4 for name %p", name); result4 = clean_namehooks(adb, &name->v4); @@ -1116,7 +1107,7 @@ check_expire_namehooks(dns_adbname_t *name, isc_stdtime_t now) { /* * Check to see if we need to remove the v6 addresses */ - if (!NAME_FETCH_V6(name) && EXPIRE_OK(name->expire_v6, now)) { + if (!NAME_FETCH_AAAA(name) && EXPIRE_OK(name->expire_v6, now)) { if (NAME_HAS_V6(name)) { DP(DEF_LEVEL, "expiring v6 for name %p", name); result6 = clean_namehooks(adb, &name->v6); @@ -2867,14 +2858,17 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action, isc_stdtime_t now, dns_name_t *target, in_port_t port, unsigned int depth, isc_counter_t *qc, dns_adbfind_t **findp) { - dns_adbfind_t *find; - dns_adbname_t *adbname; + dns_adbfind_t *find = NULL; + dns_adbname_t *adbname = NULL; int bucket; - bool want_event, start_at_zone, alias, have_address; + bool want_event = true; + bool start_at_zone = false; + bool alias = false; + bool have_address = false; isc_result_t result; - unsigned int wanted_addresses; - unsigned int wanted_fetches; - unsigned int query_pending; + unsigned int wanted_addresses = (options & DNS_ADBFIND_ADDRESSMASK); + unsigned int wanted_fetches = 0; + unsigned int query_pending = 0; char namebuf[DNS_NAME_FORMATSIZE]; REQUIRE(DNS_ADB_VALID(adb)); @@ -2890,12 +2884,6 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action, result = ISC_R_UNEXPECTED; POST(result); - wanted_addresses = (options & DNS_ADBFIND_ADDRESSMASK); - wanted_fetches = 0; - query_pending = 0; - want_event = false; - start_at_zone = false; - alias = false; if (now == 0) { isc_stdtime_get(&now); @@ -3043,7 +3031,7 @@ dns_adb_createfind(dns_adb_t *adb, isc_task_t *task, isc_taskaction_t action, goto v6; } - if (!NAME_FETCH_V4(adbname)) { + if (!NAME_FETCH_A(adbname)) { wanted_fetches |= DNS_ADBFIND_INET; } } @@ -3079,7 +3067,7 @@ v6: goto fetch; } - if (!NAME_FETCH_V6(adbname)) { + if (!NAME_FETCH_AAAA(adbname)) { wanted_fetches |= DNS_ADBFIND_INET6; } } @@ -3140,10 +3128,10 @@ fetch: copy_namehook_lists(adb, find, qname, qtype, adbname, now); post_copy: - if (NAME_FETCH_V4(adbname)) { + if (NAME_FETCH_A(adbname)) { query_pending |= DNS_ADBFIND_INET; } - if (NAME_FETCH_V6(adbname)) { + if (NAME_FETCH_AAAA(adbname)) { query_pending |= DNS_ADBFIND_INET6; } @@ -3151,7 +3139,6 @@ post_copy: * Attach to the name's query list if there are queries * already running, and we have been asked to. */ - want_event = true; if (!FIND_WANTEVENT(find)) { want_event = false; } @@ -3165,16 +3152,15 @@ post_copy: want_event = false; } if (want_event) { + bool empty; find->adbname = adbname; find->name_bucket = bucket; - bool empty = ISC_LIST_EMPTY(adbname->finds); + empty = ISC_LIST_EMPTY(adbname->finds); ISC_LIST_APPEND(adbname->finds, find, plink); find->query_pending = (query_pending & wanted_addresses); find->flags &= ~DNS_ADBFIND_ADDRESSMASK; find->flags |= (find->query_pending & DNS_ADBFIND_ADDRESSMASK); - DP(DEF_LEVEL, - "createfind: attaching find %p to adbname " - "%p %d", + DP(DEF_LEVEL, "createfind: attaching find %p to adbname %p %d", find, adbname, empty); } else { /* @@ -3207,22 +3193,20 @@ post_copy: out: if (find != NULL) { - *findp = find; - if (want_event) { - isc_task_t *taskp; + isc_task_t *taskp = NULL; INSIST((find->flags & DNS_ADBFIND_ADDRESSMASK) != 0); - taskp = NULL; isc_task_attach(task, &taskp); find->event.ev_sender = taskp; find->event.ev_action = action; find->event.ev_arg = arg; } + + *findp = find; } UNLOCK(&adb->namelocks[bucket]); - return (result); } @@ -4011,8 +3995,8 @@ fetch_name(dns_adbname_t *adbname, bool start_at_zone, unsigned int depth, adb = adbname->adb; INSIST(DNS_ADB_VALID(adb)); - INSIST((type == dns_rdatatype_a && !NAME_FETCH_V4(adbname)) || - (type == dns_rdatatype_aaaa && !NAME_FETCH_V6(adbname))); + INSIST((type == dns_rdatatype_a && !NAME_FETCH_A(adbname)) || + (type == dns_rdatatype_aaaa && !NAME_FETCH_AAAA(adbname))); adbname->fetch_err = FIND_ERR_NOTFOUND; From 128068fde20b9474614b1766b72c99ef3cc8bd2d Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Wed, 6 Oct 2021 13:42:09 -0700 Subject: [PATCH 2/4] check for loops in ADB finds If an ADB find is started on behalf of a resolver fetch, and fails to find any addresses but has a pending resolver fetch associated with it, then we need to check whether the fetch it's waiting on is the one that created it. If so, it can never finish and needs to be terminated. --- lib/dns/resolver.c | 98 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 23 deletions(-) diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 68839ae57f..eb752e224b 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -408,8 +408,8 @@ struct fetchctx { /*% * Fetch-local statistics for detailed logging. */ - isc_result_t result; /*%< fetch result */ - isc_result_t vresult; /*%< validation result */ + isc_result_t result; /*%< fetch result */ + isc_result_t vresult; /*%< validation result */ int exitline; isc_time_t start; uint64_t duration; @@ -1786,7 +1786,7 @@ resquery_senddone(isc_result_t eresult, isc_region_t *region, void *arg) { case ISC_R_NOPERM: case ISC_R_ADDRNOTAVAIL: case ISC_R_CONNREFUSED: - /* No route to remote. */ + /* No route to remote. */ FCTXTRACE3("query canceled in resquery_senddone(): " "no route to host; no response", eresult); @@ -3209,20 +3209,51 @@ sort_finds(dns_adbfindlist_t *findlist, unsigned int bias) { *findlist = sorted; } +/* + * Return true iff the ADB find has a pending fetch for 'type'. This is + * used to find out whether we're in a loop, where a fetch is waiting for a + * find which is waiting for that same fetch. + * + * Note: This could be done with either an equivalence check (e.g., + * query_pending == DNS_ADBFIND_INET) or with a bit check, as below. If + * we checked for equivalence, that would mean we could only detect a loop + * when there is exactly one pending fetch, and we're it. If there were + * pending fetches for *both* address families, then a loop would be + * undetected. + * + * However, using a bit check means that in theory, an ADB find might be + * aborted that could have succeeded, if the other fetch had returned an + * answer. + * + * Since there's a good chance the server is broken and won't answer either + * query, and since an ADB find with two pending fetches is a very rare + * occurrance anyway, we regard this theoretical SERVFAIL as the lesser + * evil. + */ +static inline bool +waiting_for(dns_adbfind_t *find, dns_rdatatype_t type) { + switch (type) { + case dns_rdatatype_a: + return ((find->query_pending & DNS_ADBFIND_INET) != 0); + case dns_rdatatype_aaaa: + return ((find->query_pending & DNS_ADBFIND_INET6) != 0); + default: + return (false); + } +} + static void findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, unsigned int options, unsigned int flags, isc_stdtime_t now, bool *overquota, bool *need_alternate, unsigned int *no_addresses) { - dns_adbaddrinfo_t *ai; - dns_adbfind_t *find; - dns_resolver_t *res; - bool unshared; + dns_adbaddrinfo_t *ai = NULL; + dns_adbfind_t *find = NULL; + dns_resolver_t *res = fctx->res; + bool unshared = ((fctx->options & DNS_FETCHOPT_UNSHARED) != 0); isc_result_t result; - fetchctx_t *ev_fctx = NULL; FCTXTRACE("FINDNAME"); - res = fctx->res; - unshared = ((fctx->options & DNS_FETCHOPT_UNSHARED) != 0); + /* * If this name is a subdomain of the query domain, tell * the ADB to start looking using zone/hint data. This keeps us @@ -3239,11 +3270,10 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, /* * See what we know about this address. */ - find = NULL; - fctx_attach(fctx, &ev_fctx); + fctx_attach(fctx, &(fetchctx_t *){ NULL }); result = dns_adb_createfind( fctx->adb, res->buckets[fctx->bucketnum].task, fctx_finddone, - ev_fctx, name, fctx->name, fctx->type, options, now, NULL, + fctx, name, fctx->name, fctx->type, options, now, NULL, res->view->dstport, fctx->depth + 1, fctx->qc, &find); isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER, @@ -3267,7 +3297,7 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, "is a CNAME, while resolving '%s'", namebuf, fctx->info); } - fctx_detach(&ev_fctx); + fctx_detach(&fctx); return; } @@ -3298,12 +3328,33 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, /* * We don't know any of the addresses for this name. + * + * The find may be waiting on a resolver fetch for a server + * address. We need to make sure it isn't waiting on *this* + * fetch, because if it is, we won't be answering it and it + * won't be answering us. + */ + if (waiting_for(find, fctx->type) && dns_name_equal(name, fctx->name)) { + fctx->adberr++; + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER, + DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO, + "loop detected resolving '%s'", fctx->info); + + if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) { + atomic_fetch_add_relaxed(&fctx->pending, 1); + dns_adb_cancelfind(find); + } else { + dns_adb_destroyfind(&find); + fctx_detach(&fctx); + } + return; + } + + /* + * We may be waiting for another fetch to complete, and + * we'll get an event later when the find has what it needs. */ if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) { - /* - * We're looking for them and will get an - * event about it later. - */ atomic_fetch_add_relaxed(&fctx->pending, 1); /* @@ -3323,17 +3374,18 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, return; } + /* + * No addresses and no pending events: the find failed. + */ if ((find->options & DNS_ADBFIND_OVERQUOTA) != 0) { if (overquota != NULL) { *overquota = true; } fctx->quotacount++; /* quota exceeded */ } else if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0) { - fctx->lamecount++; /* cached lame server - */ + fctx->lamecount++; /* cached lame server */ } else { - fctx->adberr++; /* unreachable server, - etc. */ + fctx->adberr++; /* unreachable server, etc. */ } /* @@ -3347,7 +3399,7 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, *need_alternate = true; } dns_adb_destroyfind(&find); - fctx_detach(&ev_fctx); + fctx_detach(&fctx); } static bool From 61ac32a9893aca42cf8cb02699c63c53cd7df9ad Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Mon, 18 Oct 2021 23:14:28 -0700 Subject: [PATCH 3/4] add a system test with an ADB loop Add a lame delegation to lame.example.org with only an A record in the additional section; on failure, this will trigger a retry with AAAA, which will loop. Test that dig returns SERVFAIL, in addition to confirming that named doesn't hang on shutdown. --- bin/tests/system/resolver/ans2/ans.pl | 5 +++++ bin/tests/system/resolver/ans3/ans.pl | 5 +++++ bin/tests/system/resolver/tests.sh | 8 ++++++++ 3 files changed, 18 insertions(+) diff --git a/bin/tests/system/resolver/ans2/ans.pl b/bin/tests/system/resolver/ans2/ans.pl index cc923e3dfb..d9f6f53508 100644 --- a/bin/tests/system/resolver/ans2/ans.pl +++ b/bin/tests/system/resolver/ans2/ans.pl @@ -102,6 +102,11 @@ for (;;) { } elsif ($qname =~ /example\.net/) { $packet->push("authority", new Net::DNS::RR("example.net 300 NS ns.example.net")); $packet->push("additional", new Net::DNS::RR("ns.example.net 300 A 10.53.0.3")); + } elsif ($qname =~ /lame\.example\.org/) { + $packet->header->ad(0); + $packet->header->aa(0); + $packet->push("authority", new Net::DNS::RR("lame.example.org 300 NS ns.lame.example.org")); + $packet->push("additional", new Net::DNS::RR("ns.lame.example.org 300 A 10.53.0.3")); } elsif ($qname =~ /sub\.example\.org/) { # Data for CNAME/DNAME filtering. The final answers are # expected to be accepted regardless of the filter setting. diff --git a/bin/tests/system/resolver/ans3/ans.pl b/bin/tests/system/resolver/ans3/ans.pl index be8708ddf5..15d4c98780 100644 --- a/bin/tests/system/resolver/ans3/ans.pl +++ b/bin/tests/system/resolver/ans3/ans.pl @@ -98,6 +98,11 @@ for (;;) { } elsif ($qname =~ /^nxdomain\.example\.net$/i) { $packet->header->aa(1); $packet->header->rcode(NXDOMAIN); + } elsif ($qname =~ /lame\.example\.org/) { + $packet->header->ad(0); + $packet->header->aa(0); + $packet->push("authority", new Net::DNS::RR("lame.example.org 300 NS ns.lame.example.org")); + $packet->push("additional", new Net::DNS::RR("ns.lame.example.org 300 A 10.53.0.3")); } elsif ($qname eq "cname.sub.example.org") { $packet->push("answer", new Net::DNS::RR($qname . diff --git a/bin/tests/system/resolver/tests.sh b/bin/tests/system/resolver/tests.sh index 165b196b70..d8485f29ed 100755 --- a/bin/tests/system/resolver/tests.sh +++ b/bin/tests/system/resolver/tests.sh @@ -889,5 +889,13 @@ grep 'cname-next\.example\.net\..*CNAME.http-server\.example\.net\.' dig.out.ns7 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` +n=`expr $n + 1` +echo_i "check ADB find loops are detected ($n)" +ret=0 +$DIG $DIGOPTS +tcp +tries=1 +timeout=5 @10.53.0.1 fake.lame.example.org > dig.out.ns1.${n} || ret=1 +grep "status: SERVFAIL" dig.out.ns1.${n} > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=`expr $status + $ret` + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 From 1ed928dad76a9b6624baf1b41299d5221f000008 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Mon, 18 Oct 2021 23:20:32 -0700 Subject: [PATCH 4/4] CHANGES and release note for [GL #2927] --- CHANGES | 7 +++++++ doc/notes/notes-current.rst | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/CHANGES b/CHANGES index b6167e471c..d76f0d8c69 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +5746. [bug] A lame server delegation could lead to a loop in which + a resolver fetch depends on an ADB find which depends + on the same resolver fetch. Previously, this would + cause the fetch to hang until timing out, but after + change #5730 it would hang forever. The condition is + now detected and avoided. [GL #2927] + 5745. [bug] Fetch context objects now use attach/detach semantics to make it easier to find and debug reference-counting errors, and several such errors diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 98874357fd..28127fb2e7 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -112,3 +112,8 @@ Bug Fixes - Logfiles using ``timestamp``-style suffixes were not always correctly removed when the number of files exceeded the limit set by ``versions``. :gl:`#828` + +- Some lame delegations could trigger a dependency loop, in which a + resolver fetch was waiting for a name server address lookup which was + waiting for the same resolver fetch. This could cause a recursive lookup + to hang until timing out. This now detected and avoided. :gl:`#2927`